Repository: openai/gym Branch: master Commit: dcd185843a62 Files: 219 Total size: 1.1 MB Directory structure: gitextract_3_1zpoik/ ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug.md │ │ ├── proposal.md │ │ └── question.md │ ├── PULL_REQUEST_TEMPLATE.md │ ├── stale.yml │ └── workflows/ │ ├── build.yml │ └── pre-commit.yml ├── .gitignore ├── .pre-commit-config.yaml ├── CODE_OF_CONDUCT.rst ├── CONTRIBUTING.md ├── LICENSE.md ├── README.md ├── bin/ │ └── docker_entrypoint ├── gym/ │ ├── __init__.py │ ├── core.py │ ├── envs/ │ │ ├── __init__.py │ │ ├── box2d/ │ │ │ ├── __init__.py │ │ │ ├── bipedal_walker.py │ │ │ ├── car_dynamics.py │ │ │ ├── car_racing.py │ │ │ └── lunar_lander.py │ │ ├── classic_control/ │ │ │ ├── __init__.py │ │ │ ├── acrobot.py │ │ │ ├── cartpole.py │ │ │ ├── continuous_mountain_car.py │ │ │ ├── mountain_car.py │ │ │ ├── pendulum.py │ │ │ └── utils.py │ │ ├── mujoco/ │ │ │ ├── __init__.py │ │ │ ├── ant.py │ │ │ ├── ant_v3.py │ │ │ ├── ant_v4.py │ │ │ ├── assets/ │ │ │ │ ├── ant.xml │ │ │ │ ├── half_cheetah.xml │ │ │ │ ├── hopper.xml │ │ │ │ ├── humanoid.xml │ │ │ │ ├── humanoidstandup.xml │ │ │ │ ├── inverted_double_pendulum.xml │ │ │ │ ├── inverted_pendulum.xml │ │ │ │ ├── point.xml │ │ │ │ ├── pusher.xml │ │ │ │ ├── reacher.xml │ │ │ │ ├── swimmer.xml │ │ │ │ └── walker2d.xml │ │ │ ├── half_cheetah.py │ │ │ ├── half_cheetah_v3.py │ │ │ ├── half_cheetah_v4.py │ │ │ ├── hopper.py │ │ │ ├── hopper_v3.py │ │ │ ├── hopper_v4.py │ │ │ ├── humanoid.py │ │ │ ├── humanoid_v3.py │ │ │ ├── humanoid_v4.py │ │ │ ├── humanoidstandup.py │ │ │ ├── humanoidstandup_v4.py │ │ │ ├── inverted_double_pendulum.py │ │ │ ├── inverted_double_pendulum_v4.py │ │ │ ├── inverted_pendulum.py │ │ │ ├── inverted_pendulum_v4.py │ │ │ ├── mujoco_env.py │ │ │ ├── mujoco_rendering.py │ │ │ ├── pusher.py │ │ │ ├── pusher_v4.py │ │ │ ├── reacher.py │ │ │ ├── reacher_v4.py │ │ │ ├── swimmer.py │ │ │ ├── swimmer_v3.py │ │ │ ├── swimmer_v4.py │ │ │ ├── walker2d.py │ │ │ ├── walker2d_v3.py │ │ │ └── walker2d_v4.py │ │ ├── registration.py │ │ └── toy_text/ │ │ ├── __init__.py │ │ ├── blackjack.py │ │ ├── cliffwalking.py │ │ ├── frozen_lake.py │ │ ├── taxi.py │ │ └── utils.py │ ├── error.py │ ├── logger.py │ ├── py.typed │ ├── spaces/ │ │ ├── __init__.py │ │ ├── box.py │ │ ├── dict.py │ │ ├── discrete.py │ │ ├── graph.py │ │ ├── multi_binary.py │ │ ├── multi_discrete.py │ │ ├── sequence.py │ │ ├── space.py │ │ ├── text.py │ │ ├── tuple.py │ │ └── utils.py │ ├── utils/ │ │ ├── __init__.py │ │ ├── colorize.py │ │ ├── env_checker.py │ │ ├── ezpickle.py │ │ ├── passive_env_checker.py │ │ ├── play.py │ │ ├── save_video.py │ │ ├── seeding.py │ │ └── step_api_compatibility.py │ ├── vector/ │ │ ├── __init__.py │ │ ├── async_vector_env.py │ │ ├── sync_vector_env.py │ │ ├── utils/ │ │ │ ├── __init__.py │ │ │ ├── misc.py │ │ │ ├── numpy_utils.py │ │ │ ├── shared_memory.py │ │ │ └── spaces.py │ │ └── vector_env.py │ ├── version.py │ └── wrappers/ │ ├── README.md │ ├── __init__.py │ ├── atari_preprocessing.py │ ├── autoreset.py │ ├── clip_action.py │ ├── compatibility.py │ ├── env_checker.py │ ├── filter_observation.py │ ├── flatten_observation.py │ ├── frame_stack.py │ ├── gray_scale_observation.py │ ├── human_rendering.py │ ├── monitoring/ │ │ ├── __init__.py │ │ └── video_recorder.py │ ├── normalize.py │ ├── order_enforcing.py │ ├── pixel_observation.py │ ├── record_episode_statistics.py │ ├── record_video.py │ ├── render_collection.py │ ├── rescale_action.py │ ├── resize_observation.py │ ├── step_api_compatibility.py │ ├── time_aware_observation.py │ ├── time_limit.py │ ├── transform_observation.py │ ├── transform_reward.py │ └── vector_list_info.py ├── py.Dockerfile ├── pyproject.toml ├── requirements.txt ├── setup.py ├── test_requirements.txt └── tests/ ├── __init__.py ├── envs/ │ ├── __init__.py │ ├── test_action_dim_check.py │ ├── test_compatibility.py │ ├── test_env_implementation.py │ ├── test_envs.py │ ├── test_make.py │ ├── test_mujoco.py │ ├── test_register.py │ ├── test_spec.py │ ├── utils.py │ └── utils_envs.py ├── spaces/ │ ├── __init__.py │ ├── test_box.py │ ├── test_dict.py │ ├── test_discrete.py │ ├── test_graph.py │ ├── test_multibinary.py │ ├── test_multidiscrete.py │ ├── test_sequence.py │ ├── test_space.py │ ├── test_spaces.py │ ├── test_text.py │ ├── test_tuple.py │ ├── test_utils.py │ └── utils.py ├── test_core.py ├── testing_env.py ├── utils/ │ ├── __init__.py │ ├── test_env_checker.py │ ├── test_passive_env_checker.py │ ├── test_play.py │ ├── test_save_video.py │ ├── test_seeding.py │ └── test_step_api_compatibility.py ├── vector/ │ ├── __init__.py │ ├── test_async_vector_env.py │ ├── test_numpy_utils.py │ ├── test_shared_memory.py │ ├── test_spaces.py │ ├── test_sync_vector_env.py │ ├── test_vector_env.py │ ├── test_vector_env_info.py │ ├── test_vector_env_wrapper.py │ ├── test_vector_make.py │ └── utils.py └── wrappers/ ├── __init__.py ├── test_atari_preprocessing.py ├── test_autoreset.py ├── test_clip_action.py ├── test_filter_observation.py ├── test_flatten.py ├── test_flatten_observation.py ├── test_frame_stack.py ├── test_gray_scale_observation.py ├── test_human_rendering.py ├── test_nested_dict.py ├── test_normalize.py ├── test_order_enforcing.py ├── test_passive_env_checker.py ├── test_pixel_observation.py ├── test_record_episode_statistics.py ├── test_record_video.py ├── test_rescale_action.py ├── test_resize_observation.py ├── test_step_compatibility.py ├── test_time_aware_observation.py ├── test_time_limit.py ├── test_transform_observation.py ├── test_transform_reward.py ├── test_vector_list_info.py ├── test_video_recorder.py └── utils.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/bug.md ================================================ --- name: Bug Report about: Submit a bug report title: "[Bug Report] Bug title" --- If you are submitting a bug report, please fill in the following details and use the tag [bug]. **Describe the bug** A clear and concise description of what the bug is. **Code example** Please try to provide a minimal example to reproduce the bug. Error messages and stack traces are also helpful. **System Info** Describe the characteristic of your environment: * Describe how Gym was installed (pip, docker, source, ...) * What OS/version of Linux you're using. Note that while we will accept PRs to improve Window's support, we do not officially support it. * Python version **Additional context** Add any other context about the problem here. ### Checklist - [ ] I have checked that there is no similar [issue](https://github.com/openai/gym/issues) in the repo (**required**) ================================================ FILE: .github/ISSUE_TEMPLATE/proposal.md ================================================ --- name: Proposal about: Propose changes that are not fixes bugs title: "[Proposal] Proposal title" --- ### Proposal A clear and concise description of the proposal. ### Motivation Please outline the motivation for the proposal. Is your feature request related to a problem? e.g.,"I'm always frustrated when [...]". If this is related to another GitHub issue, please link here too. ### Pitch A clear and concise description of what you want to happen. ### Alternatives A clear and concise description of any alternative solutions or features you've considered, if any. ### Additional context Add any other context or screenshots about the feature request here. ### Checklist - [ ] I have checked that there is no similar [issue](https://github.com/openai/gym/issues) in the repo (**required**) ================================================ FILE: .github/ISSUE_TEMPLATE/question.md ================================================ --- name: Question about: Ask a question title: "[Question] Question title" --- ### Question If you're a beginner and have basic questions, please ask on [r/reinforcementlearning](https://www.reddit.com/r/reinforcementlearning/) or in the [RL Discord](https://discord.com/invite/xhfNqQv) (if you're new please use the beginners channel). Basic questions that are not bugs or feature requests will be closed without reply, because GitHub issues are not an appropriate venue for these. Advanced/nontrivial questions, especially in areas where documentation is lacking, are very much welcome. ================================================ FILE: .github/PULL_REQUEST_TEMPLATE.md ================================================ # Description Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change. Fixes # (issue) ## Type of change Please delete options that are not relevant. - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] This change requires a documentation update ### Screenshots Please attach before and after screenshots of the change if applicable. # Checklist: - [ ] I have run the [`pre-commit` checks](https://pre-commit.com/) with `pre-commit run --all-files` (see `CONTRIBUTING.md` instructions to set it up) - [ ] I have commented my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation - [ ] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes ================================================ FILE: .github/stale.yml ================================================ # Configuration for probot-stale - https://github.com/probot/stale # Number of days of inactivity before an Issue or Pull Request becomes stale daysUntilStale: 60 # Number of days of inactivity before an Issue or Pull Request with the stale label is closed. # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. daysUntilClose: 14 # Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled) onlyLabels: - more-information-needed # Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable exemptLabels: - pinned - security - "[Status] Maybe Later" # Set to true to ignore issues in a project (defaults to false) exemptProjects: true # Set to true to ignore issues in a milestone (defaults to false) exemptMilestones: true # Set to true to ignore issues with an assignee (defaults to false) exemptAssignees: true # Label to use when marking as stale staleLabel: stale # Comment to post when marking as stale. Set to `false` to disable markComment: > This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions. # Comment to post when removing the stale label. # unmarkComment: > # Your comment here. # Comment to post when closing a stale Issue or Pull Request. # closeComment: > # Your comment here. # Limit the number of actions per hour, from 1-30. Default is 30 limitPerRun: 30 # Limit to only `issues` or `pulls` only: issues # Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': # pulls: # daysUntilStale: 30 # markComment: > # This pull request has been automatically marked as stale because it has not had # recent activity. It will be closed if no further activity occurs. Thank you # for your contributions. # issues: # exemptLabels: # - confirmed ================================================ FILE: .github/workflows/build.yml ================================================ name: build on: [pull_request, push] permissions: contents: read # to fetch code (actions/checkout) jobs: build: runs-on: ubuntu-latest strategy: matrix: python-version: ['3.6', '3.7', '3.8', '3.9', '3.10'] steps: - uses: actions/checkout@v2 - run: | docker build -f py.Dockerfile \ --build-arg PYTHON_VERSION=${{ matrix.python-version }} \ --tag gym-docker . - name: Run tests run: docker run gym-docker pytest ================================================ FILE: .github/workflows/pre-commit.yml ================================================ # https://pre-commit.com # This GitHub Action assumes that the repo contains a valid .pre-commit-config.yaml file. name: pre-commit on: pull_request: push: branches: [master] permissions: contents: read # to fetch code (actions/checkout) jobs: pre-commit: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 - run: pip install pre-commit - run: pre-commit --version - run: pre-commit install - run: pre-commit run --all-files ================================================ FILE: .gitignore ================================================ *.swp *.pyc *.py~ .DS_Store .cache .pytest_cache/ # Setuptools distribution and build folders. /dist/ /build # Virtualenv /env # Python egg metadata, regenerated from source files by setuptools. /*.egg-info *.sublime-project *.sublime-workspace logs/ .ipynb_checkpoints ghostdriver.log junk MUJOCO_LOG.txt rllab_mujoco tutorial/*.html # IDE files .eggs .tox # PyCharm project files .idea vizdoom.ini ================================================ FILE: .pre-commit-config.yaml ================================================ --- repos: - repo: https://github.com/python/black rev: 22.3.0 hooks: - id: black - repo: https://github.com/codespell-project/codespell rev: v2.1.0 hooks: - id: codespell args: - --ignore-words-list=nd,reacher,thist,ths, ure, referenc - repo: https://gitlab.com/PyCQA/flake8 rev: 4.0.1 hooks: - id: flake8 args: - '--per-file-ignores=*/__init__.py:F401 gym/envs/registration.py:E704' - --ignore=E203,W503,E741 - --max-complexity=30 - --max-line-length=456 - --show-source - --statistics - repo: https://github.com/PyCQA/isort rev: 5.10.1 hooks: - id: isort args: ["--profile", "black"] - repo: https://github.com/pycqa/pydocstyle rev: 6.1.1 # pick a git hash / tag to point to hooks: - id: pydocstyle exclude: ^(gym/version.py)|(gym/envs/)|(tests/) args: - --source - --explain - --convention=google additional_dependencies: ["toml"] - repo: https://github.com/asottile/pyupgrade rev: v2.32.0 hooks: - id: pyupgrade # TODO: remove `--keep-runtime-typing` option args: ["--py36-plus", "--keep-runtime-typing"] - repo: local hooks: - id: pyright name: pyright entry: pyright language: node pass_filenames: false types: [python] additional_dependencies: ["pyright"] args: - --project=pyproject.toml ================================================ FILE: CODE_OF_CONDUCT.rst ================================================ OpenAI Gym is dedicated to providing a harassment-free experience for everyone, regardless of gender, gender identity and expression, sexual orientation, disability, physical appearance, body size, age, race, or religion. We do not tolerate harassment of participants in any form. This code of conduct applies to all OpenAI Gym spaces (including Gist comments) both online and off. Anyone who violates this code of conduct may be sanctioned or expelled from these spaces at the discretion of the OpenAI team. We may add additional rules over time, which will be made clearly available to participants. Participants are responsible for knowing and abiding by these rules. ================================================ FILE: CONTRIBUTING.md ================================================ # Gym Contribution Guidelines At this time we are currently accepting the current forms of contributions: - Bug reports (keep in mind that changing environment behavior should be minimized as that requires releasing a new version of the environment and makes results hard to compare across versions) - Pull requests for bug fixes - Documentation improvements Notably, we are not accepting these forms of contributions: - New environments - New features This may change in the future. If you wish to make a Gym environment, follow the instructions in [Creating Environments](https://github.com/openai/gym/blob/master/docs/creating_environments.md). When your environment works, you can make a PR to add it to the bottom of the [List of Environments](https://github.com/openai/gym/blob/master/docs/third_party_environments.md). Edit July 27, 2021: Please see https://github.com/openai/gym/issues/2259 for new contributing standards # Development This section contains technical instructions & hints for the contributors. ## Type checking The project uses `pyright` to check types. To type check locally, install `pyright` per official [instructions](https://github.com/microsoft/pyright#command-line). It's configuration lives within `pyproject.toml`. It includes list of included and excluded files currently supporting type checks. To run `pyright` for the project, run the pre-commit process (`pre-commit run --all-files`) or `pyright --project=pyproject.toml` Alternatively, pyright is a built-in feature of VSCode that will automatically provide type hinting. ### Adding typing to more modules and packages If you would like to add typing to a module in the project, the list of included, excluded and strict files can be found in pyproject.toml (pyproject.toml -> [tool.pyright]). To run `pyright` for the project, run the pre-commit process (`pre-commit run --all-files`) or `pyright` ## Git hooks The CI will run several checks on the new code pushed to the Gym repository. These checks can also be run locally without waiting for the CI by following the steps below: 1. [install `pre-commit`](https://pre-commit.com/#install), 2. Install the Git hooks by running `pre-commit install`. Once those two steps are done, the Git hooks will be run automatically at every new commit. The Git hooks can also be run manually with `pre-commit run --all-files`, and if needed they can be skipped (not recommended) with `git commit --no-verify`. **Note:** you may have to run `pre-commit run --all-files` manually a couple of times to make it pass when you commit, as each formatting tool will first format the code and fail the first time but should pass the second time. Additionally, for pull requests, the project runs a number of tests for the whole project using [pytest](https://docs.pytest.org/en/latest/getting-started.html#install-pytest). These tests can be run locally with `pytest` in the root folder. ## Docstrings Pydocstyle has been added to the pre-commit process such that all new functions follow the [google docstring style](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html). All new functions require either a short docstring, a single line explaining the purpose of a function or a multiline docstring that documents each argument and the return type (if there is one) of the function. In addition, new file and class require top docstrings that should outline the purpose of the file/class. For classes, code block examples can be provided in the top docstring and not the constructor arguments. To check your docstrings are correct, run `pre-commit run --all-files` or `pydocstyle --source --explain --convention=google`. If all docstrings that fail, the source and reason for the failure is provided. ================================================ FILE: LICENSE.md ================================================ The MIT License Copyright (c) 2016 OpenAI (https://openai.com) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. # Mujoco models This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license: ``` This file is part of MuJoCo. Copyright 2009-2015 Roboti LLC. Mujoco :: Advanced physics simulation engine Source : www.roboti.us Version : 1.31 Released : 23Apr16 Author :: Vikash Kumar Contacts : kumar@roboti.us ``` ================================================ FILE: README.md ================================================ [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white)](https://pre-commit.com/) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) ## Important Notice ### The team that has been maintaining Gym since 2021 has moved all future development to [Gymnasium](https://github.com/Farama-Foundation/Gymnasium), a drop in replacement for Gym (import gymnasium as gym), and Gym will not be receiving any future updates. Please switch over to Gymnasium as soon as you're able to do so. If you'd like to read more about the story behind this switch, please check out [this blog post](https://farama.org/Announcing-The-Farama-Foundation). ## Gym Gym is an open source Python library for developing and comparing reinforcement learning algorithms by providing a standard API to communicate between learning algorithms and environments, as well as a standard set of environments compliant with that API. Since its release, Gym's API has become the field standard for doing this. Gym documentation website is at [https://www.gymlibrary.dev/](https://www.gymlibrary.dev/), and you can propose fixes and changes to it [here](https://github.com/Farama-Foundation/gym-docs). Gym also has a discord server for development purposes that you can join here: https://discord.gg/nHg2JRN489 ## Installation To install the base Gym library, use `pip install gym`. This does not include dependencies for all families of environments (there's a massive number, and some can be problematic to install on certain systems). You can install these dependencies for one family like `pip install gym[atari]` or use `pip install gym[all]` to install all dependencies. We support Python 3.7, 3.8, 3.9 and 3.10 on Linux and macOS. We will accept PRs related to Windows, but do not officially support it. ## API The Gym API's API models environments as simple Python `env` classes. Creating environment instances and interacting with them is very simple- here's an example using the "CartPole-v1" environment: ```python import gym env = gym.make("CartPole-v1") observation, info = env.reset(seed=42) for _ in range(1000): action = env.action_space.sample() observation, reward, terminated, truncated, info = env.step(action) if terminated or truncated: observation, info = env.reset() env.close() ``` ## Notable Related Libraries Please note that this is an incomplete list, and just includes libraries that the maintainers most commonly point newcommers to when asked for recommendations. * [CleanRL](https://github.com/vwxyzjn/cleanrl) is a learning library based on the Gym API. It is designed to cater to newer people in the field and provides very good reference implementations. * [Tianshou](https://github.com/thu-ml/tianshou) is a learning library that's geared towards very experienced users and is design to allow for ease in complex algorithm modifications. * [RLlib](https://docs.ray.io/en/latest/rllib/index.html) is a learning library that allows for distributed training and inferencing and supports an extraordinarily large number of features throughout the reinforcement learning space. * [PettingZoo](https://github.com/Farama-Foundation/PettingZoo) is like Gym, but for environments with multiple agents. ## Environment Versioning Gym keeps strict versioning for reproducibility reasons. All environments end in a suffix like "\_v0". When changes are made to environments that might impact learning results, the number is increased by one to prevent potential confusion. ## MuJoCo Environments The latest "\_v4" and future versions of the MuJoCo environments will no longer depend on `mujoco-py`. Instead `mujoco` will be the required dependency for future gym MuJoCo environment versions. Old gym MuJoCo environment versions that depend on `mujoco-py` will still be kept but unmaintained. To install the dependencies for the latest gym MuJoCo environments use `pip install gym[mujoco]`. Dependencies for old MuJoCo environments can still be installed by `pip install gym[mujoco_py]`. ## Citation A whitepaper from when Gym just came out is available https://arxiv.org/pdf/1606.01540, and can be cited with the following bibtex entry: ``` @misc{1606.01540, Author = {Greg Brockman and Vicki Cheung and Ludwig Pettersson and Jonas Schneider and John Schulman and Jie Tang and Wojciech Zaremba}, Title = {OpenAI Gym}, Year = {2016}, Eprint = {arXiv:1606.01540}, } ``` ## Release Notes There used to be release notes for all the new Gym versions here. New release notes are being moved to [releases page](https://github.com/openai/gym/releases) on GitHub, like most other libraries do. Old notes can be viewed [here](https://github.com/openai/gym/blob/31be35ecd460f670f0c4b653a14c9996b7facc6c/README.rst). ================================================ FILE: bin/docker_entrypoint ================================================ #!/bin/bash # This script is the entrypoint for our Docker image. set -ex # Set up display; otherwise rendering will fail Xvfb -screen 0 1024x768x24 & export DISPLAY=:0 # Wait for the file to come up display=0 file="/tmp/.X11-unix/X$display" for i in $(seq 1 10); do if [ -e "$file" ]; then break fi echo "Waiting for $file to be created (try $i/10)" sleep "$i" done if ! [ -e "$file" ]; then echo "Timing out: $file was not created" exit 1 fi exec "$@" ================================================ FILE: gym/__init__.py ================================================ """Root __init__ of the gym module setting the __all__ of gym modules.""" # isort: skip_file from gym import error from gym.version import VERSION as __version__ from gym.core import ( Env, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper, ) from gym.spaces import Space from gym.envs import make, spec, register from gym import logger from gym import vector from gym import wrappers import os import sys __all__ = ["Env", "Space", "Wrapper", "make", "spec", "register"] # Initializing pygame initializes audio connections through SDL. SDL uses alsa by default on all Linux systems # SDL connecting to alsa frequently create these giant lists of warnings every time you import an environment using # pygame # DSP is far more benign (and should probably be the default in SDL anyways) if sys.platform.startswith("linux"): os.environ["SDL_AUDIODRIVER"] = "dsp" os.environ["PYGAME_HIDE_SUPPORT_PROMPT"] = "hide" try: import gym_notices.notices as notices # print version warning if necessary notice = notices.notices.get(__version__) if notice: print(notice, file=sys.stderr) except Exception: # nosec pass ================================================ FILE: gym/core.py ================================================ """Core API for Environment, Wrapper, ActionWrapper, RewardWrapper and ObservationWrapper.""" import sys from typing import ( TYPE_CHECKING, Any, Dict, Generic, List, Optional, SupportsFloat, Tuple, TypeVar, Union, ) import numpy as np from gym import spaces from gym.logger import warn from gym.utils import seeding if TYPE_CHECKING: from gym.envs.registration import EnvSpec if sys.version_info[0:2] == (3, 6): warn( "Gym minimally supports python 3.6 as the python foundation not longer supports the version, please update your version to 3.7+" ) ObsType = TypeVar("ObsType") ActType = TypeVar("ActType") RenderFrame = TypeVar("RenderFrame") class Env(Generic[ObsType, ActType]): r"""The main OpenAI Gym class. It encapsulates an environment with arbitrary behind-the-scenes dynamics. An environment can be partially or fully observed. The main API methods that users of this class need to know are: - :meth:`step` - Takes a step in the environment using an action returning the next observation, reward, if the environment terminated and observation information. - :meth:`reset` - Resets the environment to an initial state, returning the initial observation and observation information. - :meth:`render` - Renders the environment observation with modes depending on the output - :meth:`close` - Closes the environment, important for rendering where pygame is imported And set the following attributes: - :attr:`action_space` - The Space object corresponding to valid actions - :attr:`observation_space` - The Space object corresponding to valid observations - :attr:`reward_range` - A tuple corresponding to the minimum and maximum possible rewards - :attr:`spec` - An environment spec that contains the information used to initialise the environment from `gym.make` - :attr:`metadata` - The metadata of the environment, i.e. render modes - :attr:`np_random` - The random number generator for the environment Note: a default reward range set to :math:`(-\infty,+\infty)` already exists. Set it if you want a narrower range. """ # Set this in SOME subclasses metadata: Dict[str, Any] = {"render_modes": []} # define render_mode if your environment supports rendering render_mode: Optional[str] = None reward_range = (-float("inf"), float("inf")) spec: "EnvSpec" = None # Set these in ALL subclasses action_space: spaces.Space[ActType] observation_space: spaces.Space[ObsType] # Created _np_random: Optional[np.random.Generator] = None @property def np_random(self) -> np.random.Generator: """Returns the environment's internal :attr:`_np_random` that if not set will initialise with a random seed.""" if self._np_random is None: self._np_random, seed = seeding.np_random() return self._np_random @np_random.setter def np_random(self, value: np.random.Generator): self._np_random = value def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]: """Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling :meth:`reset` to reset this environment's state. Accepts an action and returns either a tuple `(observation, reward, terminated, truncated, info)`. Args: action (ActType): an action provided by the agent Returns: observation (object): this will be an element of the environment's :attr:`observation_space`. This may, for instance, be a numpy array containing the positions and velocities of certain objects. reward (float): The amount of reward returned as a result of taking the action. terminated (bool): whether a `terminal state` (as defined under the MDP of the task) is reached. In this case further step() calls could return undefined results. truncated (bool): whether a truncation condition outside the scope of the MDP is satisfied. Typically a timelimit, but could also be used to indicate agent physically going out of bounds. Can be used to end the episode prematurely before a `terminal state` is reached. info (dictionary): `info` contains auxiliary diagnostic information (helpful for debugging, learning, and logging). This might, for instance, contain: metrics that describe the agent's performance state, variables that are hidden from observations, or individual reward terms that are combined to produce the total reward. It also can contain information that distinguishes truncation and termination, however this is deprecated in favour of returning two booleans, and will be removed in a future version. (deprecated) done (bool): A boolean value for if the episode has ended, in which case further :meth:`step` calls will return undefined results. A done signal may be emitted for different reasons: Maybe the task underlying the environment was solved successfully, a certain timelimit was exceeded, or the physics simulation has entered an invalid state. """ raise NotImplementedError def reset( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ) -> Tuple[ObsType, dict]: """Resets the environment to an initial state and returns the initial observation. This method can reset the environment's random number generator(s) if ``seed`` is an integer or if the environment has not yet initialized a random number generator. If the environment already has a random number generator and :meth:`reset` is called with ``seed=None``, the RNG should not be reset. Moreover, :meth:`reset` should (in the typical use case) be called with an integer seed right after initialization and then never again. Args: seed (optional int): The seed that is used to initialize the environment's PRNG. If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed, a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom). However, if the environment already has a PRNG and ``seed=None`` is passed, the PRNG will *not* be reset. If you pass an integer, the PRNG will be reset even if it already exists. Usually, you want to pass an integer *right after the environment has been initialized and then never again*. Please refer to the minimal example above to see this paradigm in action. options (optional dict): Additional information to specify how the environment is reset (optional, depending on the specific environment) Returns: observation (object): Observation of the initial state. This will be an element of :attr:`observation_space` (typically a numpy array) and is analogous to the observation returned by :meth:`step`. info (dictionary): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to the ``info`` returned by :meth:`step`. """ # Initialize the RNG if the seed is manually passed if seed is not None: self._np_random, seed = seeding.np_random(seed) def render(self) -> Optional[Union[RenderFrame, List[RenderFrame]]]: """Compute the render frames as specified by render_mode attribute during initialization of the environment. The set of supported modes varies per environment. (And some third-party environments may not support rendering at all.) By convention, if render_mode is: - None (default): no render is computed. - human: render return None. The environment is continuously rendered in the current display or terminal. Usually for human consumption. - rgb_array: return a single frame representing the current state of the environment. A frame is a numpy.ndarray with shape (x, y, 3) representing RGB values for an x-by-y pixel image. - rgb_array_list: return a list of frames representing the states of the environment since the last reset. Each frame is a numpy.ndarray with shape (x, y, 3), as with `rgb_array`. - ansi: Return a strings (str) or StringIO.StringIO containing a terminal-style text representation for each time step. The text can include newlines and ANSI escape sequences (e.g. for colors). Note: Make sure that your class's metadata 'render_modes' key includes the list of supported modes. It's recommended to call super() in implementations to use the functionality of this method. """ raise NotImplementedError def close(self): """Override close in your subclass to perform any necessary cleanup. Environments will automatically :meth:`close()` themselves when garbage collected or when the program exits. """ pass @property def unwrapped(self) -> "Env": """Returns the base non-wrapped environment. Returns: Env: The base non-wrapped gym.Env instance """ return self def __str__(self): """Returns a string of the environment with the spec id if specified.""" if self.spec is None: return f"<{type(self).__name__} instance>" else: return f"<{type(self).__name__}<{self.spec.id}>>" def __enter__(self): """Support with-statement for the environment.""" return self def __exit__(self, *args): """Support with-statement for the environment.""" self.close() # propagate exception return False class Wrapper(Env[ObsType, ActType]): """Wraps an environment to allow a modular transformation of the :meth:`step` and :meth:`reset` methods. This class is the base class for all wrappers. The subclass could override some methods to change the behavior of the original environment without touching the original code. Note: Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`. """ def __init__(self, env: Env): """Wraps an environment to allow a modular transformation of the :meth:`step` and :meth:`reset` methods. Args: env: The environment to wrap """ self.env = env self._action_space: Optional[spaces.Space] = None self._observation_space: Optional[spaces.Space] = None self._reward_range: Optional[Tuple[SupportsFloat, SupportsFloat]] = None self._metadata: Optional[dict] = None def __getattr__(self, name): """Returns an attribute with ``name``, unless ``name`` starts with an underscore.""" if name.startswith("_"): raise AttributeError(f"accessing private attribute '{name}' is prohibited") return getattr(self.env, name) @property def spec(self): """Returns the environment specification.""" return self.env.spec @classmethod def class_name(cls): """Returns the class name of the wrapper.""" return cls.__name__ @property def action_space(self) -> spaces.Space[ActType]: """Returns the action space of the environment.""" if self._action_space is None: return self.env.action_space return self._action_space @action_space.setter def action_space(self, space: spaces.Space): self._action_space = space @property def observation_space(self) -> spaces.Space: """Returns the observation space of the environment.""" if self._observation_space is None: return self.env.observation_space return self._observation_space @observation_space.setter def observation_space(self, space: spaces.Space): self._observation_space = space @property def reward_range(self) -> Tuple[SupportsFloat, SupportsFloat]: """Return the reward range of the environment.""" if self._reward_range is None: return self.env.reward_range return self._reward_range @reward_range.setter def reward_range(self, value: Tuple[SupportsFloat, SupportsFloat]): self._reward_range = value @property def metadata(self) -> dict: """Returns the environment metadata.""" if self._metadata is None: return self.env.metadata return self._metadata @metadata.setter def metadata(self, value): self._metadata = value @property def render_mode(self) -> Optional[str]: """Returns the environment render_mode.""" return self.env.render_mode @property def np_random(self) -> np.random.Generator: """Returns the environment np_random.""" return self.env.np_random @np_random.setter def np_random(self, value): self.env.np_random = value @property def _np_random(self): raise AttributeError( "Can't access `_np_random` of a wrapper, use `.unwrapped._np_random` or `.np_random`." ) def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]: """Steps through the environment with action.""" return self.env.step(action) def reset(self, **kwargs) -> Tuple[ObsType, dict]: """Resets the environment with kwargs.""" return self.env.reset(**kwargs) def render( self, *args, **kwargs ) -> Optional[Union[RenderFrame, List[RenderFrame]]]: """Renders the environment.""" return self.env.render(*args, **kwargs) def close(self): """Closes the environment.""" return self.env.close() def __str__(self): """Returns the wrapper name and the unwrapped environment string.""" return f"<{type(self).__name__}{self.env}>" def __repr__(self): """Returns the string representation of the wrapper.""" return str(self) @property def unwrapped(self) -> Env: """Returns the base environment of the wrapper.""" return self.env.unwrapped class ObservationWrapper(Wrapper): """Superclass of wrappers that can modify observations using :meth:`observation` for :meth:`reset` and :meth:`step`. If you would like to apply a function to the observation that is returned by the base environment before passing it to learning code, you can simply inherit from :class:`ObservationWrapper` and overwrite the method :meth:`observation` to implement that transformation. The transformation defined in that method must be defined on the base environment’s observation space. However, it may take values in a different space. In that case, you need to specify the new observation space of the wrapper by setting :attr:`self.observation_space` in the :meth:`__init__` method of your wrapper. For example, you might have a 2D navigation task where the environment returns dictionaries as observations with keys ``"agent_position"`` and ``"target_position"``. A common thing to do might be to throw away some degrees of freedom and only consider the position of the target relative to the agent, i.e. ``observation["target_position"] - observation["agent_position"]``. For this, you could implement an observation wrapper like this:: class RelativePosition(gym.ObservationWrapper): def __init__(self, env): super().__init__(env) self.observation_space = Box(shape=(2,), low=-np.inf, high=np.inf) def observation(self, obs): return obs["target"] - obs["agent"] Among others, Gym provides the observation wrapper :class:`TimeAwareObservation`, which adds information about the index of the timestep to the observation. """ def reset(self, **kwargs): """Resets the environment, returning a modified observation using :meth:`self.observation`.""" obs, info = self.env.reset(**kwargs) return self.observation(obs), info def step(self, action): """Returns a modified observation using :meth:`self.observation` after calling :meth:`env.step`.""" observation, reward, terminated, truncated, info = self.env.step(action) return self.observation(observation), reward, terminated, truncated, info def observation(self, observation): """Returns a modified observation.""" raise NotImplementedError class RewardWrapper(Wrapper): """Superclass of wrappers that can modify the returning reward from a step. If you would like to apply a function to the reward that is returned by the base environment before passing it to learning code, you can simply inherit from :class:`RewardWrapper` and overwrite the method :meth:`reward` to implement that transformation. This transformation might change the reward range; to specify the reward range of your wrapper, you can simply define :attr:`self.reward_range` in :meth:`__init__`. Let us look at an example: Sometimes (especially when we do not have control over the reward because it is intrinsic), we want to clip the reward to a range to gain some numerical stability. To do that, we could, for instance, implement the following wrapper:: class ClipReward(gym.RewardWrapper): def __init__(self, env, min_reward, max_reward): super().__init__(env) self.min_reward = min_reward self.max_reward = max_reward self.reward_range = (min_reward, max_reward) def reward(self, reward): return np.clip(reward, self.min_reward, self.max_reward) """ def step(self, action): """Modifies the reward using :meth:`self.reward` after the environment :meth:`env.step`.""" observation, reward, terminated, truncated, info = self.env.step(action) return observation, self.reward(reward), terminated, truncated, info def reward(self, reward): """Returns a modified ``reward``.""" raise NotImplementedError class ActionWrapper(Wrapper): """Superclass of wrappers that can modify the action before :meth:`env.step`. If you would like to apply a function to the action before passing it to the base environment, you can simply inherit from :class:`ActionWrapper` and overwrite the method :meth:`action` to implement that transformation. The transformation defined in that method must take values in the base environment’s action space. However, its domain might differ from the original action space. In that case, you need to specify the new action space of the wrapper by setting :attr:`self.action_space` in the :meth:`__init__` method of your wrapper. Let’s say you have an environment with action space of type :class:`gym.spaces.Box`, but you would only like to use a finite subset of actions. Then, you might want to implement the following wrapper:: class DiscreteActions(gym.ActionWrapper): def __init__(self, env, disc_to_cont): super().__init__(env) self.disc_to_cont = disc_to_cont self.action_space = Discrete(len(disc_to_cont)) def action(self, act): return self.disc_to_cont[act] if __name__ == "__main__": env = gym.make("LunarLanderContinuous-v2") wrapped_env = DiscreteActions(env, [np.array([1,0]), np.array([-1,0]), np.array([0,1]), np.array([0,-1])]) print(wrapped_env.action_space) #Discrete(4) Among others, Gym provides the action wrappers :class:`ClipAction` and :class:`RescaleAction`. """ def step(self, action): """Runs the environment :meth:`env.step` using the modified ``action`` from :meth:`self.action`.""" return self.env.step(self.action(action)) def action(self, action): """Returns a modified action before :meth:`env.step` is called.""" raise NotImplementedError def reverse_action(self, action): """Returns a reversed ``action``.""" raise NotImplementedError ================================================ FILE: gym/envs/__init__.py ================================================ from gym.envs.registration import load_env_plugins as _load_env_plugins from gym.envs.registration import make, register, registry, spec # Hook to load plugins from entry points _load_env_plugins() # Classic # ---------------------------------------- register( id="CartPole-v0", entry_point="gym.envs.classic_control.cartpole:CartPoleEnv", max_episode_steps=200, reward_threshold=195.0, ) register( id="CartPole-v1", entry_point="gym.envs.classic_control.cartpole:CartPoleEnv", max_episode_steps=500, reward_threshold=475.0, ) register( id="MountainCar-v0", entry_point="gym.envs.classic_control.mountain_car:MountainCarEnv", max_episode_steps=200, reward_threshold=-110.0, ) register( id="MountainCarContinuous-v0", entry_point="gym.envs.classic_control.continuous_mountain_car:Continuous_MountainCarEnv", max_episode_steps=999, reward_threshold=90.0, ) register( id="Pendulum-v1", entry_point="gym.envs.classic_control.pendulum:PendulumEnv", max_episode_steps=200, ) register( id="Acrobot-v1", entry_point="gym.envs.classic_control.acrobot:AcrobotEnv", reward_threshold=-100.0, max_episode_steps=500, ) # Box2d # ---------------------------------------- register( id="LunarLander-v2", entry_point="gym.envs.box2d.lunar_lander:LunarLander", max_episode_steps=1000, reward_threshold=200, ) register( id="LunarLanderContinuous-v2", entry_point="gym.envs.box2d.lunar_lander:LunarLander", kwargs={"continuous": True}, max_episode_steps=1000, reward_threshold=200, ) register( id="BipedalWalker-v3", entry_point="gym.envs.box2d.bipedal_walker:BipedalWalker", max_episode_steps=1600, reward_threshold=300, ) register( id="BipedalWalkerHardcore-v3", entry_point="gym.envs.box2d.bipedal_walker:BipedalWalker", kwargs={"hardcore": True}, max_episode_steps=2000, reward_threshold=300, ) register( id="CarRacing-v2", entry_point="gym.envs.box2d.car_racing:CarRacing", max_episode_steps=1000, reward_threshold=900, ) # Toy Text # ---------------------------------------- register( id="Blackjack-v1", entry_point="gym.envs.toy_text.blackjack:BlackjackEnv", kwargs={"sab": True, "natural": False}, ) register( id="FrozenLake-v1", entry_point="gym.envs.toy_text.frozen_lake:FrozenLakeEnv", kwargs={"map_name": "4x4"}, max_episode_steps=100, reward_threshold=0.70, # optimum = 0.74 ) register( id="FrozenLake8x8-v1", entry_point="gym.envs.toy_text.frozen_lake:FrozenLakeEnv", kwargs={"map_name": "8x8"}, max_episode_steps=200, reward_threshold=0.85, # optimum = 0.91 ) register( id="CliffWalking-v0", entry_point="gym.envs.toy_text.cliffwalking:CliffWalkingEnv", ) register( id="Taxi-v3", entry_point="gym.envs.toy_text.taxi:TaxiEnv", reward_threshold=8, # optimum = 8.46 max_episode_steps=200, ) # Mujoco # ---------------------------------------- # 2D register( id="Reacher-v2", entry_point="gym.envs.mujoco:ReacherEnv", max_episode_steps=50, reward_threshold=-3.75, ) register( id="Reacher-v4", entry_point="gym.envs.mujoco.reacher_v4:ReacherEnv", max_episode_steps=50, reward_threshold=-3.75, ) register( id="Pusher-v2", entry_point="gym.envs.mujoco:PusherEnv", max_episode_steps=100, reward_threshold=0.0, ) register( id="Pusher-v4", entry_point="gym.envs.mujoco.pusher_v4:PusherEnv", max_episode_steps=100, reward_threshold=0.0, ) register( id="InvertedPendulum-v2", entry_point="gym.envs.mujoco:InvertedPendulumEnv", max_episode_steps=1000, reward_threshold=950.0, ) register( id="InvertedPendulum-v4", entry_point="gym.envs.mujoco.inverted_pendulum_v4:InvertedPendulumEnv", max_episode_steps=1000, reward_threshold=950.0, ) register( id="InvertedDoublePendulum-v2", entry_point="gym.envs.mujoco:InvertedDoublePendulumEnv", max_episode_steps=1000, reward_threshold=9100.0, ) register( id="InvertedDoublePendulum-v4", entry_point="gym.envs.mujoco.inverted_double_pendulum_v4:InvertedDoublePendulumEnv", max_episode_steps=1000, reward_threshold=9100.0, ) register( id="HalfCheetah-v2", entry_point="gym.envs.mujoco:HalfCheetahEnv", max_episode_steps=1000, reward_threshold=4800.0, ) register( id="HalfCheetah-v3", entry_point="gym.envs.mujoco.half_cheetah_v3:HalfCheetahEnv", max_episode_steps=1000, reward_threshold=4800.0, ) register( id="HalfCheetah-v4", entry_point="gym.envs.mujoco.half_cheetah_v4:HalfCheetahEnv", max_episode_steps=1000, reward_threshold=4800.0, ) register( id="Hopper-v2", entry_point="gym.envs.mujoco:HopperEnv", max_episode_steps=1000, reward_threshold=3800.0, ) register( id="Hopper-v3", entry_point="gym.envs.mujoco.hopper_v3:HopperEnv", max_episode_steps=1000, reward_threshold=3800.0, ) register( id="Hopper-v4", entry_point="gym.envs.mujoco.hopper_v4:HopperEnv", max_episode_steps=1000, reward_threshold=3800.0, ) register( id="Swimmer-v2", entry_point="gym.envs.mujoco:SwimmerEnv", max_episode_steps=1000, reward_threshold=360.0, ) register( id="Swimmer-v3", entry_point="gym.envs.mujoco.swimmer_v3:SwimmerEnv", max_episode_steps=1000, reward_threshold=360.0, ) register( id="Swimmer-v4", entry_point="gym.envs.mujoco.swimmer_v4:SwimmerEnv", max_episode_steps=1000, reward_threshold=360.0, ) register( id="Walker2d-v2", max_episode_steps=1000, entry_point="gym.envs.mujoco:Walker2dEnv", ) register( id="Walker2d-v3", max_episode_steps=1000, entry_point="gym.envs.mujoco.walker2d_v3:Walker2dEnv", ) register( id="Walker2d-v4", max_episode_steps=1000, entry_point="gym.envs.mujoco.walker2d_v4:Walker2dEnv", ) register( id="Ant-v2", entry_point="gym.envs.mujoco:AntEnv", max_episode_steps=1000, reward_threshold=6000.0, ) register( id="Ant-v3", entry_point="gym.envs.mujoco.ant_v3:AntEnv", max_episode_steps=1000, reward_threshold=6000.0, ) register( id="Ant-v4", entry_point="gym.envs.mujoco.ant_v4:AntEnv", max_episode_steps=1000, reward_threshold=6000.0, ) register( id="Humanoid-v2", entry_point="gym.envs.mujoco:HumanoidEnv", max_episode_steps=1000, ) register( id="Humanoid-v3", entry_point="gym.envs.mujoco.humanoid_v3:HumanoidEnv", max_episode_steps=1000, ) register( id="Humanoid-v4", entry_point="gym.envs.mujoco.humanoid_v4:HumanoidEnv", max_episode_steps=1000, ) register( id="HumanoidStandup-v2", entry_point="gym.envs.mujoco:HumanoidStandupEnv", max_episode_steps=1000, ) register( id="HumanoidStandup-v4", entry_point="gym.envs.mujoco.humanoidstandup_v4:HumanoidStandupEnv", max_episode_steps=1000, ) ================================================ FILE: gym/envs/box2d/__init__.py ================================================ from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore from gym.envs.box2d.car_racing import CarRacing from gym.envs.box2d.lunar_lander import LunarLander, LunarLanderContinuous ================================================ FILE: gym/envs/box2d/bipedal_walker.py ================================================ __credits__ = ["Andrea PIERRÉ"] import math from typing import TYPE_CHECKING, List, Optional import numpy as np import gym from gym import error, spaces from gym.error import DependencyNotInstalled from gym.utils import EzPickle try: import Box2D from Box2D.b2 import ( circleShape, contactListener, edgeShape, fixtureDef, polygonShape, revoluteJointDef, ) except ImportError: raise DependencyNotInstalled("box2D is not installed, run `pip install gym[box2d]`") if TYPE_CHECKING: import pygame FPS = 50 SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well MOTORS_TORQUE = 80 SPEED_HIP = 4 SPEED_KNEE = 6 LIDAR_RANGE = 160 / SCALE INITIAL_RANDOM = 5 HULL_POLY = [(-30, +9), (+6, +9), (+34, +1), (+34, -8), (-30, -8)] LEG_DOWN = -8 / SCALE LEG_W, LEG_H = 8 / SCALE, 34 / SCALE VIEWPORT_W = 600 VIEWPORT_H = 400 TERRAIN_STEP = 14 / SCALE TERRAIN_LENGTH = 200 # in steps TERRAIN_HEIGHT = VIEWPORT_H / SCALE / 4 TERRAIN_GRASS = 10 # low long are grass spots, in steps TERRAIN_STARTPAD = 20 # in steps FRICTION = 2.5 HULL_FD = fixtureDef( shape=polygonShape(vertices=[(x / SCALE, y / SCALE) for x, y in HULL_POLY]), density=5.0, friction=0.1, categoryBits=0x0020, maskBits=0x001, # collide only with ground restitution=0.0, ) # 0.99 bouncy LEG_FD = fixtureDef( shape=polygonShape(box=(LEG_W / 2, LEG_H / 2)), density=1.0, restitution=0.0, categoryBits=0x0020, maskBits=0x001, ) LOWER_FD = fixtureDef( shape=polygonShape(box=(0.8 * LEG_W / 2, LEG_H / 2)), density=1.0, restitution=0.0, categoryBits=0x0020, maskBits=0x001, ) class ContactDetector(contactListener): def __init__(self, env): contactListener.__init__(self) self.env = env def BeginContact(self, contact): if ( self.env.hull == contact.fixtureA.body or self.env.hull == contact.fixtureB.body ): self.env.game_over = True for leg in [self.env.legs[1], self.env.legs[3]]: if leg in [contact.fixtureA.body, contact.fixtureB.body]: leg.ground_contact = True def EndContact(self, contact): for leg in [self.env.legs[1], self.env.legs[3]]: if leg in [contact.fixtureA.body, contact.fixtureB.body]: leg.ground_contact = False class BipedalWalker(gym.Env, EzPickle): """ ### Description This is a simple 4-joint walker robot environment. There are two versions: - Normal, with slightly uneven terrain. - Hardcore, with ladders, stumps, pitfalls. To solve the normal version, you need to get 300 points in 1600 time steps. To solve the hardcore version, you need 300 points in 2000 time steps. A heuristic is provided for testing. It's also useful to get demonstrations to learn from. To run the heuristic: ``` python gym/envs/box2d/bipedal_walker.py ``` ### Action Space Actions are motor speed values in the [-1, 1] range for each of the 4 joints at both hips and knees. ### Observation Space State consists of hull angle speed, angular velocity, horizontal speed, vertical speed, position of joints and joints angular speed, legs contact with ground, and 10 lidar rangefinder measurements. There are no coordinates in the state vector. ### Rewards Reward is given for moving forward, totaling 300+ points up to the far end. If the robot falls, it gets -100. Applying motor torque costs a small amount of points. A more optimal agent will get a better score. ### Starting State The walker starts standing at the left end of the terrain with the hull horizontal, and both legs in the same position with a slight knee angle. ### Episode Termination The episode will terminate if the hull gets in contact with the ground or if the walker exceeds the right end of the terrain length. ### Arguments To use to the _hardcore_ environment, you need to specify the `hardcore=True` argument like below: ```python import gym env = gym.make("BipedalWalker-v3", hardcore=True) ``` ### Version History - v3: returns closest lidar trace instead of furthest; faster video recording - v2: Count energy spent - v1: Legs now report contact with ground; motors have higher torque and speed; ground has higher friction; lidar rendered less nervously. - v0: Initial version ### Credits Created by Oleg Klimov """ metadata = { "render_modes": ["human", "rgb_array"], "render_fps": FPS, } def __init__(self, render_mode: Optional[str] = None, hardcore: bool = False): EzPickle.__init__(self, render_mode, hardcore) self.isopen = True self.world = Box2D.b2World() self.terrain: List[Box2D.b2Body] = [] self.hull: Optional[Box2D.b2Body] = None self.prev_shaping = None self.hardcore = hardcore self.fd_polygon = fixtureDef( shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]), friction=FRICTION, ) self.fd_edge = fixtureDef( shape=edgeShape(vertices=[(0, 0), (1, 1)]), friction=FRICTION, categoryBits=0x0001, ) # we use 5.0 to represent the joints moving at maximum # 5 x the rated speed due to impulses from ground contact etc. low = np.array( [ -math.pi, -5.0, -5.0, -5.0, -math.pi, -5.0, -math.pi, -5.0, -0.0, -math.pi, -5.0, -math.pi, -5.0, -0.0, ] + [-1.0] * 10 ).astype(np.float32) high = np.array( [ math.pi, 5.0, 5.0, 5.0, math.pi, 5.0, math.pi, 5.0, 5.0, math.pi, 5.0, math.pi, 5.0, 5.0, ] + [1.0] * 10 ).astype(np.float32) self.action_space = spaces.Box( np.array([-1, -1, -1, -1]).astype(np.float32), np.array([1, 1, 1, 1]).astype(np.float32), ) self.observation_space = spaces.Box(low, high) # state = [ # self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible. # 2.0 * self.hull.angularVelocity / FPS, # 0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range # 0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS, # self.joints[ # 0 # ].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too) # self.joints[0].speed / SPEED_HIP, # self.joints[1].angle + 1.0, # self.joints[1].speed / SPEED_KNEE, # 1.0 if self.legs[1].ground_contact else 0.0, # self.joints[2].angle, # self.joints[2].speed / SPEED_HIP, # self.joints[3].angle + 1.0, # self.joints[3].speed / SPEED_KNEE, # 1.0 if self.legs[3].ground_contact else 0.0, # ] # state += [l.fraction for l in self.lidar] self.render_mode = render_mode self.screen: Optional[pygame.Surface] = None self.clock = None def _destroy(self): if not self.terrain: return self.world.contactListener = None for t in self.terrain: self.world.DestroyBody(t) self.terrain = [] self.world.DestroyBody(self.hull) self.hull = None for leg in self.legs: self.world.DestroyBody(leg) self.legs = [] self.joints = [] def _generate_terrain(self, hardcore): GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5) state = GRASS velocity = 0.0 y = TERRAIN_HEIGHT counter = TERRAIN_STARTPAD oneshot = False self.terrain = [] self.terrain_x = [] self.terrain_y = [] stair_steps, stair_width, stair_height = 0, 0, 0 original_y = 0 for i in range(TERRAIN_LENGTH): x = i * TERRAIN_STEP self.terrain_x.append(x) if state == GRASS and not oneshot: velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y) if i > TERRAIN_STARTPAD: velocity += self.np_random.uniform(-1, 1) / SCALE # 1 y += velocity elif state == PIT and oneshot: counter = self.np_random.integers(3, 5) poly = [ (x, y), (x + TERRAIN_STEP, y), (x + TERRAIN_STEP, y - 4 * TERRAIN_STEP), (x, y - 4 * TERRAIN_STEP), ] self.fd_polygon.shape.vertices = poly t = self.world.CreateStaticBody(fixtures=self.fd_polygon) t.color1, t.color2 = (255, 255, 255), (153, 153, 153) self.terrain.append(t) self.fd_polygon.shape.vertices = [ (p[0] + TERRAIN_STEP * counter, p[1]) for p in poly ] t = self.world.CreateStaticBody(fixtures=self.fd_polygon) t.color1, t.color2 = (255, 255, 255), (153, 153, 153) self.terrain.append(t) counter += 2 original_y = y elif state == PIT and not oneshot: y = original_y if counter > 1: y -= 4 * TERRAIN_STEP elif state == STUMP and oneshot: counter = self.np_random.integers(1, 3) poly = [ (x, y), (x + counter * TERRAIN_STEP, y), (x + counter * TERRAIN_STEP, y + counter * TERRAIN_STEP), (x, y + counter * TERRAIN_STEP), ] self.fd_polygon.shape.vertices = poly t = self.world.CreateStaticBody(fixtures=self.fd_polygon) t.color1, t.color2 = (255, 255, 255), (153, 153, 153) self.terrain.append(t) elif state == STAIRS and oneshot: stair_height = +1 if self.np_random.random() > 0.5 else -1 stair_width = self.np_random.integers(4, 5) stair_steps = self.np_random.integers(3, 5) original_y = y for s in range(stair_steps): poly = [ ( x + (s * stair_width) * TERRAIN_STEP, y + (s * stair_height) * TERRAIN_STEP, ), ( x + ((1 + s) * stair_width) * TERRAIN_STEP, y + (s * stair_height) * TERRAIN_STEP, ), ( x + ((1 + s) * stair_width) * TERRAIN_STEP, y + (-1 + s * stair_height) * TERRAIN_STEP, ), ( x + (s * stair_width) * TERRAIN_STEP, y + (-1 + s * stair_height) * TERRAIN_STEP, ), ] self.fd_polygon.shape.vertices = poly t = self.world.CreateStaticBody(fixtures=self.fd_polygon) t.color1, t.color2 = (255, 255, 255), (153, 153, 153) self.terrain.append(t) counter = stair_steps * stair_width elif state == STAIRS and not oneshot: s = stair_steps * stair_width - counter - stair_height n = s / stair_width y = original_y + (n * stair_height) * TERRAIN_STEP oneshot = False self.terrain_y.append(y) counter -= 1 if counter == 0: counter = self.np_random.integers(TERRAIN_GRASS / 2, TERRAIN_GRASS) if state == GRASS and hardcore: state = self.np_random.integers(1, _STATES_) oneshot = True else: state = GRASS oneshot = True self.terrain_poly = [] for i in range(TERRAIN_LENGTH - 1): poly = [ (self.terrain_x[i], self.terrain_y[i]), (self.terrain_x[i + 1], self.terrain_y[i + 1]), ] self.fd_edge.shape.vertices = poly t = self.world.CreateStaticBody(fixtures=self.fd_edge) color = (76, 255 if i % 2 == 0 else 204, 76) t.color1 = color t.color2 = color self.terrain.append(t) color = (102, 153, 76) poly += [(poly[1][0], 0), (poly[0][0], 0)] self.terrain_poly.append((poly, color)) self.terrain.reverse() def _generate_clouds(self): # Sorry for the clouds, couldn't resist self.cloud_poly = [] for i in range(TERRAIN_LENGTH // 20): x = self.np_random.uniform(0, TERRAIN_LENGTH) * TERRAIN_STEP y = VIEWPORT_H / SCALE * 3 / 4 poly = [ ( x + 15 * TERRAIN_STEP * math.sin(3.14 * 2 * a / 5) + self.np_random.uniform(0, 5 * TERRAIN_STEP), y + 5 * TERRAIN_STEP * math.cos(3.14 * 2 * a / 5) + self.np_random.uniform(0, 5 * TERRAIN_STEP), ) for a in range(5) ] x1 = min(p[0] for p in poly) x2 = max(p[0] for p in poly) self.cloud_poly.append((poly, x1, x2)) def reset( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ): super().reset(seed=seed) self._destroy() self.world.contactListener_bug_workaround = ContactDetector(self) self.world.contactListener = self.world.contactListener_bug_workaround self.game_over = False self.prev_shaping = None self.scroll = 0.0 self.lidar_render = 0 self._generate_terrain(self.hardcore) self._generate_clouds() init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2 init_y = TERRAIN_HEIGHT + 2 * LEG_H self.hull = self.world.CreateDynamicBody( position=(init_x, init_y), fixtures=HULL_FD ) self.hull.color1 = (127, 51, 229) self.hull.color2 = (76, 76, 127) self.hull.ApplyForceToCenter( (self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True ) self.legs: List[Box2D.b2Body] = [] self.joints: List[Box2D.b2RevoluteJoint] = [] for i in [-1, +1]: leg = self.world.CreateDynamicBody( position=(init_x, init_y - LEG_H / 2 - LEG_DOWN), angle=(i * 0.05), fixtures=LEG_FD, ) leg.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25) leg.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25) rjd = revoluteJointDef( bodyA=self.hull, bodyB=leg, localAnchorA=(0, LEG_DOWN), localAnchorB=(0, LEG_H / 2), enableMotor=True, enableLimit=True, maxMotorTorque=MOTORS_TORQUE, motorSpeed=i, lowerAngle=-0.8, upperAngle=1.1, ) self.legs.append(leg) self.joints.append(self.world.CreateJoint(rjd)) lower = self.world.CreateDynamicBody( position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN), angle=(i * 0.05), fixtures=LOWER_FD, ) lower.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25) lower.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25) rjd = revoluteJointDef( bodyA=leg, bodyB=lower, localAnchorA=(0, -LEG_H / 2), localAnchorB=(0, LEG_H / 2), enableMotor=True, enableLimit=True, maxMotorTorque=MOTORS_TORQUE, motorSpeed=1, lowerAngle=-1.6, upperAngle=-0.1, ) lower.ground_contact = False self.legs.append(lower) self.joints.append(self.world.CreateJoint(rjd)) self.drawlist = self.terrain + self.legs + [self.hull] class LidarCallback(Box2D.b2.rayCastCallback): def ReportFixture(self, fixture, point, normal, fraction): if (fixture.filterData.categoryBits & 1) == 0: return -1 self.p2 = point self.fraction = fraction return fraction self.lidar = [LidarCallback() for _ in range(10)] if self.render_mode == "human": self.render() return self.step(np.array([0, 0, 0, 0]))[0], {} def step(self, action: np.ndarray): assert self.hull is not None # self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help control_speed = False # Should be easier as well if control_speed: self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1)) self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1)) self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1)) self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1)) else: self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0])) self.joints[0].maxMotorTorque = float( MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1) ) self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1])) self.joints[1].maxMotorTorque = float( MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1) ) self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2])) self.joints[2].maxMotorTorque = float( MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1) ) self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3])) self.joints[3].maxMotorTorque = float( MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1) ) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) pos = self.hull.position vel = self.hull.linearVelocity for i in range(10): self.lidar[i].fraction = 1.0 self.lidar[i].p1 = pos self.lidar[i].p2 = ( pos[0] + math.sin(1.5 * i / 10.0) * LIDAR_RANGE, pos[1] - math.cos(1.5 * i / 10.0) * LIDAR_RANGE, ) self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2) state = [ self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible. 2.0 * self.hull.angularVelocity / FPS, 0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range 0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS, self.joints[0].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too) self.joints[0].speed / SPEED_HIP, self.joints[1].angle + 1.0, self.joints[1].speed / SPEED_KNEE, 1.0 if self.legs[1].ground_contact else 0.0, self.joints[2].angle, self.joints[2].speed / SPEED_HIP, self.joints[3].angle + 1.0, self.joints[3].speed / SPEED_KNEE, 1.0 if self.legs[3].ground_contact else 0.0, ] state += [l.fraction for l in self.lidar] assert len(state) == 24 self.scroll = pos.x - VIEWPORT_W / SCALE / 5 shaping = ( 130 * pos[0] / SCALE ) # moving forward is a way to receive reward (normalized to get 300 on completion) shaping -= 5.0 * abs( state[0] ) # keep head straight, other than that and falling, any behavior is unpunished reward = 0 if self.prev_shaping is not None: reward = shaping - self.prev_shaping self.prev_shaping = shaping for a in action: reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1) # normalized to about -50.0 using heuristic, more optimal agent should spend less terminated = False if self.game_over or pos[0] < 0: reward = -100 terminated = True if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP: terminated = True if self.render_mode == "human": self.render() return np.array(state, dtype=np.float32), reward, terminated, False, {} def render(self): if self.render_mode is None: gym.logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) return try: import pygame from pygame import gfxdraw except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[box2d]`" ) if self.screen is None and self.render_mode == "human": pygame.init() pygame.display.init() self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H)) if self.clock is None: self.clock = pygame.time.Clock() self.surf = pygame.Surface( (VIEWPORT_W + max(0.0, self.scroll) * SCALE, VIEWPORT_H) ) pygame.transform.scale(self.surf, (SCALE, SCALE)) pygame.draw.polygon( self.surf, color=(215, 215, 255), points=[ (self.scroll * SCALE, 0), (self.scroll * SCALE + VIEWPORT_W, 0), (self.scroll * SCALE + VIEWPORT_W, VIEWPORT_H), (self.scroll * SCALE, VIEWPORT_H), ], ) for poly, x1, x2 in self.cloud_poly: if x2 < self.scroll / 2: continue if x1 > self.scroll / 2 + VIEWPORT_W / SCALE: continue pygame.draw.polygon( self.surf, color=(255, 255, 255), points=[ (p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly ], ) gfxdraw.aapolygon( self.surf, [(p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly], (255, 255, 255), ) for poly, color in self.terrain_poly: if poly[1][0] < self.scroll: continue if poly[0][0] > self.scroll + VIEWPORT_W / SCALE: continue scaled_poly = [] for coord in poly: scaled_poly.append([coord[0] * SCALE, coord[1] * SCALE]) pygame.draw.polygon(self.surf, color=color, points=scaled_poly) gfxdraw.aapolygon(self.surf, scaled_poly, color) self.lidar_render = (self.lidar_render + 1) % 100 i = self.lidar_render if i < 2 * len(self.lidar): single_lidar = ( self.lidar[i] if i < len(self.lidar) else self.lidar[len(self.lidar) - i - 1] ) if hasattr(single_lidar, "p1") and hasattr(single_lidar, "p2"): pygame.draw.line( self.surf, color=(255, 0, 0), start_pos=(single_lidar.p1[0] * SCALE, single_lidar.p1[1] * SCALE), end_pos=(single_lidar.p2[0] * SCALE, single_lidar.p2[1] * SCALE), width=1, ) for obj in self.drawlist: for f in obj.fixtures: trans = f.body.transform if type(f.shape) is circleShape: pygame.draw.circle( self.surf, color=obj.color1, center=trans * f.shape.pos * SCALE, radius=f.shape.radius * SCALE, ) pygame.draw.circle( self.surf, color=obj.color2, center=trans * f.shape.pos * SCALE, radius=f.shape.radius * SCALE, ) else: path = [trans * v * SCALE for v in f.shape.vertices] if len(path) > 2: pygame.draw.polygon(self.surf, color=obj.color1, points=path) gfxdraw.aapolygon(self.surf, path, obj.color1) path.append(path[0]) pygame.draw.polygon( self.surf, color=obj.color2, points=path, width=1 ) gfxdraw.aapolygon(self.surf, path, obj.color2) else: pygame.draw.aaline( self.surf, start_pos=path[0], end_pos=path[1], color=obj.color1, ) flagy1 = TERRAIN_HEIGHT * SCALE flagy2 = flagy1 + 50 x = TERRAIN_STEP * 3 * SCALE pygame.draw.aaline( self.surf, color=(0, 0, 0), start_pos=(x, flagy1), end_pos=(x, flagy2) ) f = [ (x, flagy2), (x, flagy2 - 10), (x + 25, flagy2 - 5), ] pygame.draw.polygon(self.surf, color=(230, 51, 0), points=f) pygame.draw.lines( self.surf, color=(0, 0, 0), points=f + [f[0]], width=1, closed=False ) self.surf = pygame.transform.flip(self.surf, False, True) if self.render_mode == "human": assert self.screen is not None self.screen.blit(self.surf, (-self.scroll * SCALE, 0)) pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() elif self.render_mode == "rgb_array": return np.transpose( np.array(pygame.surfarray.pixels3d(self.surf)), axes=(1, 0, 2) )[:, -VIEWPORT_W:] def close(self): if self.screen is not None: import pygame pygame.display.quit() pygame.quit() self.isopen = False class BipedalWalkerHardcore: def __init__(self): raise error.Error( "Error initializing BipedalWalkerHardcore Environment.\n" "Currently, we do not support initializing this mode of environment by calling the class directly.\n" "To use this environment, instead create it by specifying the hardcore keyword in gym.make, i.e.\n" 'gym.make("BipedalWalker-v3", hardcore=True)' ) if __name__ == "__main__": # Heurisic: suboptimal, have no notion of balance. env = BipedalWalker() env.reset() steps = 0 total_reward = 0 a = np.array([0.0, 0.0, 0.0, 0.0]) STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1, 2, 3 SPEED = 0.29 # Will fall forward on higher speed state = STAY_ON_ONE_LEG moving_leg = 0 supporting_leg = 1 - moving_leg SUPPORT_KNEE_ANGLE = +0.1 supporting_knee_angle = SUPPORT_KNEE_ANGLE while True: s, r, terminated, truncated, info = env.step(a) total_reward += r if steps % 20 == 0 or terminated or truncated: print("\naction " + str([f"{x:+0.2f}" for x in a])) print(f"step {steps} total_reward {total_reward:+0.2f}") print("hull " + str([f"{x:+0.2f}" for x in s[0:4]])) print("leg0 " + str([f"{x:+0.2f}" for x in s[4:9]])) print("leg1 " + str([f"{x:+0.2f}" for x in s[9:14]])) steps += 1 contact0 = s[8] contact1 = s[13] moving_s_base = 4 + 5 * moving_leg supporting_s_base = 4 + 5 * supporting_leg hip_targ = [None, None] # -0.8 .. +1.1 knee_targ = [None, None] # -0.6 .. +0.9 hip_todo = [0.0, 0.0] knee_todo = [0.0, 0.0] if state == STAY_ON_ONE_LEG: hip_targ[moving_leg] = 1.1 knee_targ[moving_leg] = -0.6 supporting_knee_angle += 0.03 if s[2] > SPEED: supporting_knee_angle += 0.03 supporting_knee_angle = min(supporting_knee_angle, SUPPORT_KNEE_ANGLE) knee_targ[supporting_leg] = supporting_knee_angle if s[supporting_s_base + 0] < 0.10: # supporting leg is behind state = PUT_OTHER_DOWN if state == PUT_OTHER_DOWN: hip_targ[moving_leg] = +0.1 knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE knee_targ[supporting_leg] = supporting_knee_angle if s[moving_s_base + 4]: state = PUSH_OFF supporting_knee_angle = min(s[moving_s_base + 2], SUPPORT_KNEE_ANGLE) if state == PUSH_OFF: knee_targ[moving_leg] = supporting_knee_angle knee_targ[supporting_leg] = +1.0 if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * SPEED: state = STAY_ON_ONE_LEG moving_leg = 1 - moving_leg supporting_leg = 1 - moving_leg if hip_targ[0]: hip_todo[0] = 0.9 * (hip_targ[0] - s[4]) - 0.25 * s[5] if hip_targ[1]: hip_todo[1] = 0.9 * (hip_targ[1] - s[9]) - 0.25 * s[10] if knee_targ[0]: knee_todo[0] = 4.0 * (knee_targ[0] - s[6]) - 0.25 * s[7] if knee_targ[1]: knee_todo[1] = 4.0 * (knee_targ[1] - s[11]) - 0.25 * s[12] hip_todo[0] -= 0.9 * (0 - s[0]) - 1.5 * s[1] # PID to keep head strait hip_todo[1] -= 0.9 * (0 - s[0]) - 1.5 * s[1] knee_todo[0] -= 15.0 * s[3] # vertical speed, to damp oscillations knee_todo[1] -= 15.0 * s[3] a[0] = hip_todo[0] a[1] = knee_todo[0] a[2] = hip_todo[1] a[3] = knee_todo[1] a = np.clip(0.5 * a, -1.0, 1.0) if terminated or truncated: break ================================================ FILE: gym/envs/box2d/car_dynamics.py ================================================ """ Top-down car dynamics simulation. Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell. This simulation is a bit more detailed, with wheels rotation. Created by Oleg Klimov """ import math import Box2D import numpy as np from gym.error import DependencyNotInstalled try: from Box2D.b2 import fixtureDef, polygonShape, revoluteJointDef except ImportError: raise DependencyNotInstalled("box2D is not installed, run `pip install gym[box2d]`") SIZE = 0.02 ENGINE_POWER = 100000000 * SIZE * SIZE WHEEL_MOMENT_OF_INERTIA = 4000 * SIZE * SIZE FRICTION_LIMIT = ( 1000000 * SIZE * SIZE ) # friction ~= mass ~= size^2 (calculated implicitly using density) WHEEL_R = 27 WHEEL_W = 14 WHEELPOS = [(-55, +80), (+55, +80), (-55, -82), (+55, -82)] HULL_POLY1 = [(-60, +130), (+60, +130), (+60, +110), (-60, +110)] HULL_POLY2 = [(-15, +120), (+15, +120), (+20, +20), (-20, 20)] HULL_POLY3 = [ (+25, +20), (+50, -10), (+50, -40), (+20, -90), (-20, -90), (-50, -40), (-50, -10), (-25, +20), ] HULL_POLY4 = [(-50, -120), (+50, -120), (+50, -90), (-50, -90)] WHEEL_COLOR = (0, 0, 0) WHEEL_WHITE = (77, 77, 77) MUD_COLOR = (102, 102, 0) class Car: def __init__(self, world, init_angle, init_x, init_y): self.world: Box2D.b2World = world self.hull: Box2D.b2Body = self.world.CreateDynamicBody( position=(init_x, init_y), angle=init_angle, fixtures=[ fixtureDef( shape=polygonShape( vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY1] ), density=1.0, ), fixtureDef( shape=polygonShape( vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY2] ), density=1.0, ), fixtureDef( shape=polygonShape( vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY3] ), density=1.0, ), fixtureDef( shape=polygonShape( vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY4] ), density=1.0, ), ], ) self.hull.color = (0.8, 0.0, 0.0) self.wheels = [] self.fuel_spent = 0.0 WHEEL_POLY = [ (-WHEEL_W, +WHEEL_R), (+WHEEL_W, +WHEEL_R), (+WHEEL_W, -WHEEL_R), (-WHEEL_W, -WHEEL_R), ] for wx, wy in WHEELPOS: front_k = 1.0 if wy > 0 else 1.0 w = self.world.CreateDynamicBody( position=(init_x + wx * SIZE, init_y + wy * SIZE), angle=init_angle, fixtures=fixtureDef( shape=polygonShape( vertices=[ (x * front_k * SIZE, y * front_k * SIZE) for x, y in WHEEL_POLY ] ), density=0.1, categoryBits=0x0020, maskBits=0x001, restitution=0.0, ), ) w.wheel_rad = front_k * WHEEL_R * SIZE w.color = WHEEL_COLOR w.gas = 0.0 w.brake = 0.0 w.steer = 0.0 w.phase = 0.0 # wheel angle w.omega = 0.0 # angular velocity w.skid_start = None w.skid_particle = None rjd = revoluteJointDef( bodyA=self.hull, bodyB=w, localAnchorA=(wx * SIZE, wy * SIZE), localAnchorB=(0, 0), enableMotor=True, enableLimit=True, maxMotorTorque=180 * 900 * SIZE * SIZE, motorSpeed=0, lowerAngle=-0.4, upperAngle=+0.4, ) w.joint = self.world.CreateJoint(rjd) w.tiles = set() w.userData = w self.wheels.append(w) self.drawlist = self.wheels + [self.hull] self.particles = [] def gas(self, gas): """control: rear wheel drive Args: gas (float): How much gas gets applied. Gets clipped between 0 and 1. """ gas = np.clip(gas, 0, 1) for w in self.wheels[2:4]: diff = gas - w.gas if diff > 0.1: diff = 0.1 # gradually increase, but stop immediately w.gas += diff def brake(self, b): """control: brake Args: b (0..1): Degree to which the brakes are applied. More than 0.9 blocks the wheels to zero rotation""" for w in self.wheels: w.brake = b def steer(self, s): """control: steer Args: s (-1..1): target position, it takes time to rotate steering wheel from side-to-side""" self.wheels[0].steer = s self.wheels[1].steer = s def step(self, dt): for w in self.wheels: # Steer each wheel dir = np.sign(w.steer - w.joint.angle) val = abs(w.steer - w.joint.angle) w.joint.motorSpeed = dir * min(50.0 * val, 3.0) # Position => friction_limit grass = True friction_limit = FRICTION_LIMIT * 0.6 # Grass friction if no tile for tile in w.tiles: friction_limit = max( friction_limit, FRICTION_LIMIT * tile.road_friction ) grass = False # Force forw = w.GetWorldVector((0, 1)) side = w.GetWorldVector((1, 0)) v = w.linearVelocity vf = forw[0] * v[0] + forw[1] * v[1] # forward speed vs = side[0] * v[0] + side[1] * v[1] # side speed # WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy # WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power # domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega # add small coef not to divide by zero w.omega += ( dt * ENGINE_POWER * w.gas / WHEEL_MOMENT_OF_INERTIA / (abs(w.omega) + 5.0) ) self.fuel_spent += dt * ENGINE_POWER * w.gas if w.brake >= 0.9: w.omega = 0 elif w.brake > 0: BRAKE_FORCE = 15 # radians per second dir = -np.sign(w.omega) val = BRAKE_FORCE * w.brake if abs(val) > abs(w.omega): val = abs(w.omega) # low speed => same as = 0 w.omega += dir * val w.phase += w.omega * dt vr = w.omega * w.wheel_rad # rotating wheel speed f_force = -vf + vr # force direction is direction of speed difference p_force = -vs # Physically correct is to always apply friction_limit until speed is equal. # But dt is finite, that will lead to oscillations if difference is already near zero. # Random coefficient to cut oscillations in few steps (have no effect on friction_limit) f_force *= 205000 * SIZE * SIZE p_force *= 205000 * SIZE * SIZE force = np.sqrt(np.square(f_force) + np.square(p_force)) # Skid trace if abs(force) > 2.0 * friction_limit: if ( w.skid_particle and w.skid_particle.grass == grass and len(w.skid_particle.poly) < 30 ): w.skid_particle.poly.append((w.position[0], w.position[1])) elif w.skid_start is None: w.skid_start = w.position else: w.skid_particle = self._create_particle( w.skid_start, w.position, grass ) w.skid_start = None else: w.skid_start = None w.skid_particle = None if abs(force) > friction_limit: f_force /= force p_force /= force force = friction_limit # Correct physics here f_force *= force p_force *= force w.omega -= dt * f_force * w.wheel_rad / WHEEL_MOMENT_OF_INERTIA w.ApplyForceToCenter( ( p_force * side[0] + f_force * forw[0], p_force * side[1] + f_force * forw[1], ), True, ) def draw(self, surface, zoom, translation, angle, draw_particles=True): import pygame.draw if draw_particles: for p in self.particles: poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in p.poly] poly = [ ( coords[0] * zoom + translation[0], coords[1] * zoom + translation[1], ) for coords in poly ] pygame.draw.lines( surface, color=p.color, points=poly, width=2, closed=False ) for obj in self.drawlist: for f in obj.fixtures: trans = f.body.transform path = [trans * v for v in f.shape.vertices] path = [(coords[0], coords[1]) for coords in path] path = [pygame.math.Vector2(c).rotate_rad(angle) for c in path] path = [ ( coords[0] * zoom + translation[0], coords[1] * zoom + translation[1], ) for coords in path ] color = [int(c * 255) for c in obj.color] pygame.draw.polygon(surface, color=color, points=path) if "phase" not in obj.__dict__: continue a1 = obj.phase a2 = obj.phase + 1.2 # radians s1 = math.sin(a1) s2 = math.sin(a2) c1 = math.cos(a1) c2 = math.cos(a2) if s1 > 0 and s2 > 0: continue if s1 > 0: c1 = np.sign(c1) if s2 > 0: c2 = np.sign(c2) white_poly = [ (-WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE), (+WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE), (+WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE), (-WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE), ] white_poly = [trans * v for v in white_poly] white_poly = [(coords[0], coords[1]) for coords in white_poly] white_poly = [ pygame.math.Vector2(c).rotate_rad(angle) for c in white_poly ] white_poly = [ ( coords[0] * zoom + translation[0], coords[1] * zoom + translation[1], ) for coords in white_poly ] pygame.draw.polygon(surface, color=WHEEL_WHITE, points=white_poly) def _create_particle(self, point1, point2, grass): class Particle: pass p = Particle() p.color = WHEEL_COLOR if not grass else MUD_COLOR p.ttl = 1 p.poly = [(point1[0], point1[1]), (point2[0], point2[1])] p.grass = grass self.particles.append(p) while len(self.particles) > 30: self.particles.pop(0) return p def destroy(self): self.world.DestroyBody(self.hull) self.hull = None for w in self.wheels: self.world.DestroyBody(w) self.wheels = [] ================================================ FILE: gym/envs/box2d/car_racing.py ================================================ __credits__ = ["Andrea PIERRÉ"] import math from typing import Optional, Union import numpy as np import gym from gym import spaces from gym.envs.box2d.car_dynamics import Car from gym.error import DependencyNotInstalled, InvalidAction from gym.utils import EzPickle try: import Box2D from Box2D.b2 import contactListener, fixtureDef, polygonShape except ImportError: raise DependencyNotInstalled("box2D is not installed, run `pip install gym[box2d]`") try: # As pygame is necessary for using the environment (reset and step) even without a render mode # therefore, pygame is a necessary import for the environment. import pygame from pygame import gfxdraw except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[box2d]`" ) STATE_W = 96 # less than Atari 160x192 STATE_H = 96 VIDEO_W = 600 VIDEO_H = 400 WINDOW_W = 1000 WINDOW_H = 800 SCALE = 6.0 # Track scale TRACK_RAD = 900 / SCALE # Track is heavily morphed circle with this radius PLAYFIELD = 2000 / SCALE # Game over boundary FPS = 50 # Frames per second ZOOM = 2.7 # Camera zoom ZOOM_FOLLOW = True # Set to False for fixed view (don't use zoom) TRACK_DETAIL_STEP = 21 / SCALE TRACK_TURN_RATE = 0.31 TRACK_WIDTH = 40 / SCALE BORDER = 8 / SCALE BORDER_MIN_COUNT = 4 GRASS_DIM = PLAYFIELD / 20.0 MAX_SHAPE_DIM = ( max(GRASS_DIM, TRACK_WIDTH, TRACK_DETAIL_STEP) * math.sqrt(2) * ZOOM * SCALE ) class FrictionDetector(contactListener): def __init__(self, env, lap_complete_percent): contactListener.__init__(self) self.env = env self.lap_complete_percent = lap_complete_percent def BeginContact(self, contact): self._contact(contact, True) def EndContact(self, contact): self._contact(contact, False) def _contact(self, contact, begin): tile = None obj = None u1 = contact.fixtureA.body.userData u2 = contact.fixtureB.body.userData if u1 and "road_friction" in u1.__dict__: tile = u1 obj = u2 if u2 and "road_friction" in u2.__dict__: tile = u2 obj = u1 if not tile: return # inherit tile color from env tile.color[:] = self.env.road_color if not obj or "tiles" not in obj.__dict__: return if begin: obj.tiles.add(tile) if not tile.road_visited: tile.road_visited = True self.env.reward += 1000.0 / len(self.env.track) self.env.tile_visited_count += 1 # Lap is considered completed if enough % of the track was covered if ( tile.idx == 0 and self.env.tile_visited_count / len(self.env.track) > self.lap_complete_percent ): self.env.new_lap = True else: obj.tiles.remove(tile) class CarRacing(gym.Env, EzPickle): """ ### Description The easiest control task to learn from pixels - a top-down racing environment. The generated track is random every episode. Some indicators are shown at the bottom of the window along with the state RGB buffer. From left to right: true speed, four ABS sensors, steering wheel position, and gyroscope. To play yourself (it's rather fast for humans), type: ``` python gym/envs/box2d/car_racing.py ``` Remember: it's a powerful rear-wheel drive car - don't press the accelerator and turn at the same time. ### Action Space If continuous: There are 3 actions: steering (-1 is full left, +1 is full right), gas, and breaking. If discrete: There are 5 actions: do nothing, steer left, steer right, gas, brake. ### Observation Space State consists of 96x96 pixels. ### Rewards The reward is -0.1 every frame and +1000/N for every track tile visited, where N is the total number of tiles visited in the track. For example, if you have finished in 732 frames, your reward is 1000 - 0.1*732 = 926.8 points. ### Starting State The car starts at rest in the center of the road. ### Episode Termination The episode finishes when all of the tiles are visited. The car can also go outside of the playfield - that is, far off the track, in which case it will receive -100 reward and die. ### Arguments `lap_complete_percent` dictates the percentage of tiles that must be visited by the agent before a lap is considered complete. Passing `domain_randomize=True` enables the domain randomized variant of the environment. In this scenario, the background and track colours are different on every reset. Passing `continuous=False` converts the environment to use discrete action space. The discrete action space has 5 actions: [do nothing, left, right, gas, brake]. ### Reset Arguments Passing the option `options["randomize"] = True` will change the current colour of the environment on demand. Correspondingly, passing the option `options["randomize"] = False` will not change the current colour of the environment. `domain_randomize` must be `True` on init for this argument to work. Example usage: ```py env = gym.make("CarRacing-v1", domain_randomize=True) # normal reset, this changes the colour scheme by default env.reset() # reset with colour scheme change env.reset(options={"randomize": True}) # reset with no colour scheme change env.reset(options={"randomize": False}) ``` ### Version History - v1: Change track completion logic and add domain randomization (0.24.0) - v0: Original version ### References - Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car. ### Credits Created by Oleg Klimov """ metadata = { "render_modes": [ "human", "rgb_array", "state_pixels", ], "render_fps": FPS, } def __init__( self, render_mode: Optional[str] = None, verbose: bool = False, lap_complete_percent: float = 0.95, domain_randomize: bool = False, continuous: bool = True, ): EzPickle.__init__( self, render_mode, verbose, lap_complete_percent, domain_randomize, continuous, ) self.continuous = continuous self.domain_randomize = domain_randomize self.lap_complete_percent = lap_complete_percent self._init_colors() self.contactListener_keepref = FrictionDetector(self, self.lap_complete_percent) self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref) self.screen: Optional[pygame.Surface] = None self.surf = None self.clock = None self.isopen = True self.invisible_state_window = None self.invisible_video_window = None self.road = None self.car: Optional[Car] = None self.reward = 0.0 self.prev_reward = 0.0 self.verbose = verbose self.new_lap = False self.fd_tile = fixtureDef( shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]) ) # This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric # or normalised however this is not possible here so ignore if self.continuous: self.action_space = spaces.Box( np.array([-1, 0, 0]).astype(np.float32), np.array([+1, +1, +1]).astype(np.float32), ) # steer, gas, brake else: self.action_space = spaces.Discrete(5) # do nothing, left, right, gas, brake self.observation_space = spaces.Box( low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8 ) self.render_mode = render_mode def _destroy(self): if not self.road: return for t in self.road: self.world.DestroyBody(t) self.road = [] assert self.car is not None self.car.destroy() def _init_colors(self): if self.domain_randomize: # domain randomize the bg and grass colour self.road_color = self.np_random.uniform(0, 210, size=3) self.bg_color = self.np_random.uniform(0, 210, size=3) self.grass_color = np.copy(self.bg_color) idx = self.np_random.integers(3) self.grass_color[idx] += 20 else: # default colours self.road_color = np.array([102, 102, 102]) self.bg_color = np.array([102, 204, 102]) self.grass_color = np.array([102, 230, 102]) def _reinit_colors(self, randomize): assert ( self.domain_randomize ), "domain_randomize must be True to use this function." if randomize: # domain randomize the bg and grass colour self.road_color = self.np_random.uniform(0, 210, size=3) self.bg_color = self.np_random.uniform(0, 210, size=3) self.grass_color = np.copy(self.bg_color) idx = self.np_random.integers(3) self.grass_color[idx] += 20 def _create_track(self): CHECKPOINTS = 12 # Create checkpoints checkpoints = [] for c in range(CHECKPOINTS): noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS) alpha = 2 * math.pi * c / CHECKPOINTS + noise rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD) if c == 0: alpha = 0 rad = 1.5 * TRACK_RAD if c == CHECKPOINTS - 1: alpha = 2 * math.pi * c / CHECKPOINTS self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS rad = 1.5 * TRACK_RAD checkpoints.append((alpha, rad * math.cos(alpha), rad * math.sin(alpha))) self.road = [] # Go from one checkpoint to another to create track x, y, beta = 1.5 * TRACK_RAD, 0, 0 dest_i = 0 laps = 0 track = [] no_freeze = 2500 visited_other_side = False while True: alpha = math.atan2(y, x) if visited_other_side and alpha > 0: laps += 1 visited_other_side = False if alpha < 0: visited_other_side = True alpha += 2 * math.pi while True: # Find destination from checkpoints failed = True while True: dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)] if alpha <= dest_alpha: failed = False break dest_i += 1 if dest_i % len(checkpoints) == 0: break if not failed: break alpha -= 2 * math.pi continue r1x = math.cos(beta) r1y = math.sin(beta) p1x = -r1y p1y = r1x dest_dx = dest_x - x # vector towards destination dest_dy = dest_y - y # destination vector projected on rad: proj = r1x * dest_dx + r1y * dest_dy while beta - alpha > 1.5 * math.pi: beta -= 2 * math.pi while beta - alpha < -1.5 * math.pi: beta += 2 * math.pi prev_beta = beta proj *= SCALE if proj > 0.3: beta -= min(TRACK_TURN_RATE, abs(0.001 * proj)) if proj < -0.3: beta += min(TRACK_TURN_RATE, abs(0.001 * proj)) x += p1x * TRACK_DETAIL_STEP y += p1y * TRACK_DETAIL_STEP track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y)) if laps > 4: break no_freeze -= 1 if no_freeze == 0: break # Find closed loop range i1..i2, first loop should be ignored, second is OK i1, i2 = -1, -1 i = len(track) while True: i -= 1 if i == 0: return False # Failed pass_through_start = ( track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha ) if pass_through_start and i2 == -1: i2 = i elif pass_through_start and i1 == -1: i1 = i break if self.verbose: print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1)) assert i1 != -1 assert i2 != -1 track = track[i1 : i2 - 1] first_beta = track[0][1] first_perp_x = math.cos(first_beta) first_perp_y = math.sin(first_beta) # Length of perpendicular jump to put together head and tail well_glued_together = np.sqrt( np.square(first_perp_x * (track[0][2] - track[-1][2])) + np.square(first_perp_y * (track[0][3] - track[-1][3])) ) if well_glued_together > TRACK_DETAIL_STEP: return False # Red-white border on hard turns border = [False] * len(track) for i in range(len(track)): good = True oneside = 0 for neg in range(BORDER_MIN_COUNT): beta1 = track[i - neg - 0][1] beta2 = track[i - neg - 1][1] good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2 oneside += np.sign(beta1 - beta2) good &= abs(oneside) == BORDER_MIN_COUNT border[i] = good for i in range(len(track)): for neg in range(BORDER_MIN_COUNT): border[i - neg] |= border[i] # Create tiles for i in range(len(track)): alpha1, beta1, x1, y1 = track[i] alpha2, beta2, x2, y2 = track[i - 1] road1_l = ( x1 - TRACK_WIDTH * math.cos(beta1), y1 - TRACK_WIDTH * math.sin(beta1), ) road1_r = ( x1 + TRACK_WIDTH * math.cos(beta1), y1 + TRACK_WIDTH * math.sin(beta1), ) road2_l = ( x2 - TRACK_WIDTH * math.cos(beta2), y2 - TRACK_WIDTH * math.sin(beta2), ) road2_r = ( x2 + TRACK_WIDTH * math.cos(beta2), y2 + TRACK_WIDTH * math.sin(beta2), ) vertices = [road1_l, road1_r, road2_r, road2_l] self.fd_tile.shape.vertices = vertices t = self.world.CreateStaticBody(fixtures=self.fd_tile) t.userData = t c = 0.01 * (i % 3) * 255 t.color = self.road_color + c t.road_visited = False t.road_friction = 1.0 t.idx = i t.fixtures[0].sensor = True self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color)) self.road.append(t) if border[i]: side = np.sign(beta2 - beta1) b1_l = ( x1 + side * TRACK_WIDTH * math.cos(beta1), y1 + side * TRACK_WIDTH * math.sin(beta1), ) b1_r = ( x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1), y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1), ) b2_l = ( x2 + side * TRACK_WIDTH * math.cos(beta2), y2 + side * TRACK_WIDTH * math.sin(beta2), ) b2_r = ( x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2), y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2), ) self.road_poly.append( ( [b1_l, b1_r, b2_r, b2_l], (255, 255, 255) if i % 2 == 0 else (255, 0, 0), ) ) self.track = track return True def reset( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ): super().reset(seed=seed) self._destroy() self.world.contactListener_bug_workaround = FrictionDetector( self, self.lap_complete_percent ) self.world.contactListener = self.world.contactListener_bug_workaround self.reward = 0.0 self.prev_reward = 0.0 self.tile_visited_count = 0 self.t = 0.0 self.new_lap = False self.road_poly = [] if self.domain_randomize: randomize = True if isinstance(options, dict): if "randomize" in options: randomize = options["randomize"] self._reinit_colors(randomize) while True: success = self._create_track() if success: break if self.verbose: print( "retry to generate track (normal if there are not many" "instances of this message)" ) self.car = Car(self.world, *self.track[0][1:4]) if self.render_mode == "human": self.render() return self.step(None)[0], {} def step(self, action: Union[np.ndarray, int]): assert self.car is not None if action is not None: if self.continuous: self.car.steer(-action[0]) self.car.gas(action[1]) self.car.brake(action[2]) else: if not self.action_space.contains(action): raise InvalidAction( f"you passed the invalid action `{action}`. " f"The supported action_space is `{self.action_space}`" ) self.car.steer(-0.6 * (action == 1) + 0.6 * (action == 2)) self.car.gas(0.2 * (action == 3)) self.car.brake(0.8 * (action == 4)) self.car.step(1.0 / FPS) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) self.t += 1.0 / FPS self.state = self._render("state_pixels") step_reward = 0 terminated = False truncated = False if action is not None: # First step without action, called from reset() self.reward -= 0.1 # We actually don't want to count fuel spent, we want car to be faster. # self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER self.car.fuel_spent = 0.0 step_reward = self.reward - self.prev_reward self.prev_reward = self.reward if self.tile_visited_count == len(self.track) or self.new_lap: # Truncation due to finishing lap # This should not be treated as a failure # but like a timeout truncated = True x, y = self.car.hull.position if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD: terminated = True step_reward = -100 if self.render_mode == "human": self.render() return self.state, step_reward, terminated, truncated, {} def render(self): if self.render_mode is None: gym.logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) else: return self._render(self.render_mode) def _render(self, mode: str): assert mode in self.metadata["render_modes"] pygame.font.init() if self.screen is None and mode == "human": pygame.init() pygame.display.init() self.screen = pygame.display.set_mode((WINDOW_W, WINDOW_H)) if self.clock is None: self.clock = pygame.time.Clock() if "t" not in self.__dict__: return # reset() not called yet self.surf = pygame.Surface((WINDOW_W, WINDOW_H)) assert self.car is not None # computing transformations angle = -self.car.hull.angle # Animating first second zoom. zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1) scroll_x = -(self.car.hull.position[0]) * zoom scroll_y = -(self.car.hull.position[1]) * zoom trans = pygame.math.Vector2((scroll_x, scroll_y)).rotate_rad(angle) trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1]) self._render_road(zoom, trans, angle) self.car.draw( self.surf, zoom, trans, angle, mode not in ["state_pixels_list", "state_pixels"], ) self.surf = pygame.transform.flip(self.surf, False, True) # showing stats self._render_indicators(WINDOW_W, WINDOW_H) font = pygame.font.Font(pygame.font.get_default_font(), 42) text = font.render("%04i" % self.reward, True, (255, 255, 255), (0, 0, 0)) text_rect = text.get_rect() text_rect.center = (60, WINDOW_H - WINDOW_H * 2.5 / 40.0) self.surf.blit(text, text_rect) if mode == "human": pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) assert self.screen is not None self.screen.fill(0) self.screen.blit(self.surf, (0, 0)) pygame.display.flip() if mode == "rgb_array": return self._create_image_array(self.surf, (VIDEO_W, VIDEO_H)) elif mode == "state_pixels": return self._create_image_array(self.surf, (STATE_W, STATE_H)) else: return self.isopen def _render_road(self, zoom, translation, angle): bounds = PLAYFIELD field = [ (bounds, bounds), (bounds, -bounds), (-bounds, -bounds), (-bounds, bounds), ] # draw background self._draw_colored_polygon( self.surf, field, self.bg_color, zoom, translation, angle, clip=False ) # draw grass patches grass = [] for x in range(-20, 20, 2): for y in range(-20, 20, 2): grass.append( [ (GRASS_DIM * x + GRASS_DIM, GRASS_DIM * y + 0), (GRASS_DIM * x + 0, GRASS_DIM * y + 0), (GRASS_DIM * x + 0, GRASS_DIM * y + GRASS_DIM), (GRASS_DIM * x + GRASS_DIM, GRASS_DIM * y + GRASS_DIM), ] ) for poly in grass: self._draw_colored_polygon( self.surf, poly, self.grass_color, zoom, translation, angle ) # draw road for poly, color in self.road_poly: # converting to pixel coordinates poly = [(p[0], p[1]) for p in poly] color = [int(c) for c in color] self._draw_colored_polygon(self.surf, poly, color, zoom, translation, angle) def _render_indicators(self, W, H): s = W / 40.0 h = H / 40.0 color = (0, 0, 0) polygon = [(W, H), (W, H - 5 * h), (0, H - 5 * h), (0, H)] pygame.draw.polygon(self.surf, color=color, points=polygon) def vertical_ind(place, val): return [ (place * s, H - (h + h * val)), ((place + 1) * s, H - (h + h * val)), ((place + 1) * s, H - h), ((place + 0) * s, H - h), ] def horiz_ind(place, val): return [ ((place + 0) * s, H - 4 * h), ((place + val) * s, H - 4 * h), ((place + val) * s, H - 2 * h), ((place + 0) * s, H - 2 * h), ] assert self.car is not None true_speed = np.sqrt( np.square(self.car.hull.linearVelocity[0]) + np.square(self.car.hull.linearVelocity[1]) ) # simple wrapper to render if the indicator value is above a threshold def render_if_min(value, points, color): if abs(value) > 1e-4: pygame.draw.polygon(self.surf, points=points, color=color) render_if_min(true_speed, vertical_ind(5, 0.02 * true_speed), (255, 255, 255)) # ABS sensors render_if_min( self.car.wheels[0].omega, vertical_ind(7, 0.01 * self.car.wheels[0].omega), (0, 0, 255), ) render_if_min( self.car.wheels[1].omega, vertical_ind(8, 0.01 * self.car.wheels[1].omega), (0, 0, 255), ) render_if_min( self.car.wheels[2].omega, vertical_ind(9, 0.01 * self.car.wheels[2].omega), (51, 0, 255), ) render_if_min( self.car.wheels[3].omega, vertical_ind(10, 0.01 * self.car.wheels[3].omega), (51, 0, 255), ) render_if_min( self.car.wheels[0].joint.angle, horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle), (0, 255, 0), ) render_if_min( self.car.hull.angularVelocity, horiz_ind(30, -0.8 * self.car.hull.angularVelocity), (255, 0, 0), ) def _draw_colored_polygon( self, surface, poly, color, zoom, translation, angle, clip=True ): poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in poly] poly = [ (c[0] * zoom + translation[0], c[1] * zoom + translation[1]) for c in poly ] # This checks if the polygon is out of bounds of the screen, and we skip drawing if so. # Instead of calculating exactly if the polygon and screen overlap, # we simply check if the polygon is in a larger bounding box whose dimension # is greater than the screen by MAX_SHAPE_DIM, which is the maximum # diagonal length of an environment object if not clip or any( (-MAX_SHAPE_DIM <= coord[0] <= WINDOW_W + MAX_SHAPE_DIM) and (-MAX_SHAPE_DIM <= coord[1] <= WINDOW_H + MAX_SHAPE_DIM) for coord in poly ): gfxdraw.aapolygon(self.surf, poly, color) gfxdraw.filled_polygon(self.surf, poly, color) def _create_image_array(self, screen, size): scaled_screen = pygame.transform.smoothscale(screen, size) return np.transpose( np.array(pygame.surfarray.pixels3d(scaled_screen)), axes=(1, 0, 2) ) def close(self): if self.screen is not None: pygame.display.quit() self.isopen = False pygame.quit() if __name__ == "__main__": a = np.array([0.0, 0.0, 0.0]) def register_input(): global quit, restart for event in pygame.event.get(): if event.type == pygame.KEYDOWN: if event.key == pygame.K_LEFT: a[0] = -1.0 if event.key == pygame.K_RIGHT: a[0] = +1.0 if event.key == pygame.K_UP: a[1] = +1.0 if event.key == pygame.K_DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation if event.key == pygame.K_RETURN: restart = True if event.key == pygame.K_ESCAPE: quit = True if event.type == pygame.KEYUP: if event.key == pygame.K_LEFT: a[0] = 0 if event.key == pygame.K_RIGHT: a[0] = 0 if event.key == pygame.K_UP: a[1] = 0 if event.key == pygame.K_DOWN: a[2] = 0 if event.type == pygame.QUIT: quit = True env = CarRacing(render_mode="human") quit = False while not quit: env.reset() total_reward = 0.0 steps = 0 restart = False while True: register_input() s, r, terminated, truncated, info = env.step(a) total_reward += r if steps % 200 == 0 or terminated or truncated: print("\naction " + str([f"{x:+0.2f}" for x in a])) print(f"step {steps} total_reward {total_reward:+0.2f}") steps += 1 if terminated or truncated or restart or quit: break env.close() ================================================ FILE: gym/envs/box2d/lunar_lander.py ================================================ __credits__ = ["Andrea PIERRÉ"] import math import warnings from typing import TYPE_CHECKING, Optional import numpy as np import gym from gym import error, spaces from gym.error import DependencyNotInstalled from gym.utils import EzPickle, colorize from gym.utils.step_api_compatibility import step_api_compatibility try: import Box2D from Box2D.b2 import ( circleShape, contactListener, edgeShape, fixtureDef, polygonShape, revoluteJointDef, ) except ImportError: raise DependencyNotInstalled("box2d is not installed, run `pip install gym[box2d]`") if TYPE_CHECKING: import pygame FPS = 50 SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well MAIN_ENGINE_POWER = 13.0 SIDE_ENGINE_POWER = 0.6 INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder LANDER_POLY = [(-14, +17), (-17, 0), (-17, -10), (+17, -10), (+17, 0), (+14, +17)] LEG_AWAY = 20 LEG_DOWN = 18 LEG_W, LEG_H = 2, 8 LEG_SPRING_TORQUE = 40 SIDE_ENGINE_HEIGHT = 14.0 SIDE_ENGINE_AWAY = 12.0 VIEWPORT_W = 600 VIEWPORT_H = 400 class ContactDetector(contactListener): def __init__(self, env): contactListener.__init__(self) self.env = env def BeginContact(self, contact): if ( self.env.lander == contact.fixtureA.body or self.env.lander == contact.fixtureB.body ): self.env.game_over = True for i in range(2): if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]: self.env.legs[i].ground_contact = True def EndContact(self, contact): for i in range(2): if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]: self.env.legs[i].ground_contact = False class LunarLander(gym.Env, EzPickle): """ ### Description This environment is a classic rocket trajectory optimization problem. According to Pontryagin's maximum principle, it is optimal to fire the engine at full throttle or turn it off. This is the reason why this environment has discrete actions: engine on or off. There are two environment versions: discrete or continuous. The landing pad is always at coordinates (0,0). The coordinates are the first two numbers in the state vector. Landing outside of the landing pad is possible. Fuel is infinite, so an agent can learn to fly and then land on its first attempt. To see a heuristic landing, run: ``` python gym/envs/box2d/lunar_lander.py ``` ### Action Space There are four discrete actions available: do nothing, fire left orientation engine, fire main engine, fire right orientation engine. ### Observation Space The state is an 8-dimensional vector: the coordinates of the lander in `x` & `y`, its linear velocities in `x` & `y`, its angle, its angular velocity, and two booleans that represent whether each leg is in contact with the ground or not. ### Rewards After every step a reward is granted. The total reward of an episode is the sum of the rewards for all the steps within that episode. For each step, the reward: - is increased/decreased the closer/further the lander is to the landing pad. - is increased/decreased the slower/faster the lander is moving. - is decreased the more the lander is tilted (angle not horizontal). - is increased by 10 points for each leg that is in contact with the ground. - is decreased by 0.03 points each frame a side engine is firing. - is decreased by 0.3 points each frame the main engine is firing. The episode receive an additional reward of -100 or +100 points for crashing or landing safely respectively. An episode is considered a solution if it scores at least 200 points. ### Starting State The lander starts at the top center of the viewport with a random initial force applied to its center of mass. ### Episode Termination The episode finishes if: 1) the lander crashes (the lander body gets in contact with the moon); 2) the lander gets outside of the viewport (`x` coordinate is greater than 1); 3) the lander is not awake. From the [Box2D docs](https://box2d.org/documentation/md__d_1__git_hub_box2d_docs_dynamics.html#autotoc_md61), a body which is not awake is a body which doesn't move and doesn't collide with any other body: > When Box2D determines that a body (or group of bodies) has come to rest, > the body enters a sleep state which has very little CPU overhead. If a > body is awake and collides with a sleeping body, then the sleeping body > wakes up. Bodies will also wake up if a joint or contact attached to > them is destroyed. ### Arguments To use to the _continuous_ environment, you need to specify the `continuous=True` argument like below: ```python import gym env = gym.make( "LunarLander-v2", continuous: bool = False, gravity: float = -10.0, enable_wind: bool = False, wind_power: float = 15.0, turbulence_power: float = 1.5, ) ``` If `continuous=True` is passed, continuous actions (corresponding to the throttle of the engines) will be used and the action space will be `Box(-1, +1, (2,), dtype=np.float32)`. The first coordinate of an action determines the throttle of the main engine, while the second coordinate specifies the throttle of the lateral boosters. Given an action `np.array([main, lateral])`, the main engine will be turned off completely if `main < 0` and the throttle scales affinely from 50% to 100% for `0 <= main <= 1` (in particular, the main engine doesn't work with less than 50% power). Similarly, if `-0.5 < lateral < 0.5`, the lateral boosters will not fire at all. If `lateral < -0.5`, the left booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively). `gravity` dictates the gravitational constant, this is bounded to be within 0 and -12. If `enable_wind=True` is passed, there will be wind effects applied to the lander. The wind is generated using the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))`. `k` is set to 0.01. `C` is sampled randomly between -9999 and 9999. `wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for `wind_power` is between 0.0 and 20.0. `turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft. The recommended value for `turbulence_power` is between 0.0 and 2.0. ### Version History - v2: Count energy spent and in v0.24, added turbulance with wind power and turbulence_power parameters - v1: Legs contact with ground added in state vector; contact with ground give +10 reward points, and -10 if then lose contact; reward renormalized to 200; harder initial random push. - v0: Initial version ### Credits Created by Oleg Klimov """ metadata = { "render_modes": ["human", "rgb_array"], "render_fps": FPS, } def __init__( self, render_mode: Optional[str] = None, continuous: bool = False, gravity: float = -10.0, enable_wind: bool = False, wind_power: float = 15.0, turbulence_power: float = 1.5, ): EzPickle.__init__( self, render_mode, continuous, gravity, enable_wind, wind_power, turbulence_power, ) assert ( -12.0 < gravity and gravity < 0.0 ), f"gravity (current value: {gravity}) must be between -12 and 0" self.gravity = gravity if 0.0 > wind_power or wind_power > 20.0: warnings.warn( colorize( f"WARN: wind_power value is recommended to be between 0.0 and 20.0, (current value: {wind_power})", "yellow", ), ) self.wind_power = wind_power if 0.0 > turbulence_power or turbulence_power > 2.0: warnings.warn( colorize( f"WARN: turbulence_power value is recommended to be between 0.0 and 2.0, (current value: {turbulence_power})", "yellow", ), ) self.turbulence_power = turbulence_power self.enable_wind = enable_wind self.wind_idx = np.random.randint(-9999, 9999) self.torque_idx = np.random.randint(-9999, 9999) self.screen: pygame.Surface = None self.clock = None self.isopen = True self.world = Box2D.b2World(gravity=(0, gravity)) self.moon = None self.lander: Optional[Box2D.b2Body] = None self.particles = [] self.prev_reward = None self.continuous = continuous low = np.array( [ # these are bounds for position # realistically the environment should have ended # long before we reach more than 50% outside -1.5, -1.5, # velocity bounds is 5x rated speed -5.0, -5.0, -math.pi, -5.0, -0.0, -0.0, ] ).astype(np.float32) high = np.array( [ # these are bounds for position # realistically the environment should have ended # long before we reach more than 50% outside 1.5, 1.5, # velocity bounds is 5x rated speed 5.0, 5.0, math.pi, 5.0, 1.0, 1.0, ] ).astype(np.float32) # useful range is -1 .. +1, but spikes can be higher self.observation_space = spaces.Box(low, high) if self.continuous: # Action is two floats [main engine, left-right engines]. # Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power. # Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off self.action_space = spaces.Box(-1, +1, (2,), dtype=np.float32) else: # Nop, fire left engine, main engine, right engine self.action_space = spaces.Discrete(4) self.render_mode = render_mode def _destroy(self): if not self.moon: return self.world.contactListener = None self._clean_particles(True) self.world.DestroyBody(self.moon) self.moon = None self.world.DestroyBody(self.lander) self.lander = None self.world.DestroyBody(self.legs[0]) self.world.DestroyBody(self.legs[1]) def reset( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ): super().reset(seed=seed) self._destroy() self.world.contactListener_keepref = ContactDetector(self) self.world.contactListener = self.world.contactListener_keepref self.game_over = False self.prev_shaping = None W = VIEWPORT_W / SCALE H = VIEWPORT_H / SCALE # terrain CHUNKS = 11 height = self.np_random.uniform(0, H / 2, size=(CHUNKS + 1,)) chunk_x = [W / (CHUNKS - 1) * i for i in range(CHUNKS)] self.helipad_x1 = chunk_x[CHUNKS // 2 - 1] self.helipad_x2 = chunk_x[CHUNKS // 2 + 1] self.helipad_y = H / 4 height[CHUNKS // 2 - 2] = self.helipad_y height[CHUNKS // 2 - 1] = self.helipad_y height[CHUNKS // 2 + 0] = self.helipad_y height[CHUNKS // 2 + 1] = self.helipad_y height[CHUNKS // 2 + 2] = self.helipad_y smooth_y = [ 0.33 * (height[i - 1] + height[i + 0] + height[i + 1]) for i in range(CHUNKS) ] self.moon = self.world.CreateStaticBody( shapes=edgeShape(vertices=[(0, 0), (W, 0)]) ) self.sky_polys = [] for i in range(CHUNKS - 1): p1 = (chunk_x[i], smooth_y[i]) p2 = (chunk_x[i + 1], smooth_y[i + 1]) self.moon.CreateEdgeFixture(vertices=[p1, p2], density=0, friction=0.1) self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)]) self.moon.color1 = (0.0, 0.0, 0.0) self.moon.color2 = (0.0, 0.0, 0.0) initial_y = VIEWPORT_H / SCALE self.lander: Box2D.b2Body = self.world.CreateDynamicBody( position=(VIEWPORT_W / SCALE / 2, initial_y), angle=0.0, fixtures=fixtureDef( shape=polygonShape( vertices=[(x / SCALE, y / SCALE) for x, y in LANDER_POLY] ), density=5.0, friction=0.1, categoryBits=0x0010, maskBits=0x001, # collide only with ground restitution=0.0, ), # 0.99 bouncy ) self.lander.color1 = (128, 102, 230) self.lander.color2 = (77, 77, 128) self.lander.ApplyForceToCenter( ( self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), ), True, ) self.legs = [] for i in [-1, +1]: leg = self.world.CreateDynamicBody( position=(VIEWPORT_W / SCALE / 2 - i * LEG_AWAY / SCALE, initial_y), angle=(i * 0.05), fixtures=fixtureDef( shape=polygonShape(box=(LEG_W / SCALE, LEG_H / SCALE)), density=1.0, restitution=0.0, categoryBits=0x0020, maskBits=0x001, ), ) leg.ground_contact = False leg.color1 = (128, 102, 230) leg.color2 = (77, 77, 128) rjd = revoluteJointDef( bodyA=self.lander, bodyB=leg, localAnchorA=(0, 0), localAnchorB=(i * LEG_AWAY / SCALE, LEG_DOWN / SCALE), enableMotor=True, enableLimit=True, maxMotorTorque=LEG_SPRING_TORQUE, motorSpeed=+0.3 * i, # low enough not to jump back into the sky ) if i == -1: rjd.lowerAngle = ( +0.9 - 0.5 ) # The most esoteric numbers here, angled legs have freedom to travel within rjd.upperAngle = +0.9 else: rjd.lowerAngle = -0.9 rjd.upperAngle = -0.9 + 0.5 leg.joint = self.world.CreateJoint(rjd) self.legs.append(leg) self.drawlist = [self.lander] + self.legs if self.render_mode == "human": self.render() return self.step(np.array([0, 0]) if self.continuous else 0)[0], {} def _create_particle(self, mass, x, y, ttl): p = self.world.CreateDynamicBody( position=(x, y), angle=0.0, fixtures=fixtureDef( shape=circleShape(radius=2 / SCALE, pos=(0, 0)), density=mass, friction=0.1, categoryBits=0x0100, maskBits=0x001, # collide only with ground restitution=0.3, ), ) p.ttl = ttl self.particles.append(p) self._clean_particles(False) return p def _clean_particles(self, all): while self.particles and (all or self.particles[0].ttl < 0): self.world.DestroyBody(self.particles.pop(0)) def step(self, action): assert self.lander is not None # Update wind assert self.lander is not None, "You forgot to call reset()" if self.enable_wind and not ( self.legs[0].ground_contact or self.legs[1].ground_contact ): # the function used for wind is tanh(sin(2 k x) + sin(pi k x)), # which is proven to never be periodic, k = 0.01 wind_mag = ( math.tanh( math.sin(0.02 * self.wind_idx) + (math.sin(math.pi * 0.01 * self.wind_idx)) ) * self.wind_power ) self.wind_idx += 1 self.lander.ApplyForceToCenter( (wind_mag, 0.0), True, ) # the function used for torque is tanh(sin(2 k x) + sin(pi k x)), # which is proven to never be periodic, k = 0.01 torque_mag = math.tanh( math.sin(0.02 * self.torque_idx) + (math.sin(math.pi * 0.01 * self.torque_idx)) ) * (self.turbulence_power) self.torque_idx += 1 self.lander.ApplyTorque( (torque_mag), True, ) if self.continuous: action = np.clip(action, -1, +1).astype(np.float32) else: assert self.action_space.contains( action ), f"{action!r} ({type(action)}) invalid " # Engines tip = (math.sin(self.lander.angle), math.cos(self.lander.angle)) side = (-tip[1], tip[0]) dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)] m_power = 0.0 if (self.continuous and action[0] > 0.0) or ( not self.continuous and action == 2 ): # Main engine if self.continuous: m_power = (np.clip(action[0], 0.0, 1.0) + 1.0) * 0.5 # 0.5..1.0 assert m_power >= 0.5 and m_power <= 1.0 else: m_power = 1.0 # 4 is move a bit downwards, +-2 for randomness ox = tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1] oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1] impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy) p = self._create_particle( 3.5, # 3.5 is here to make particle speed adequate impulse_pos[0], impulse_pos[1], m_power, ) # particles are just a decoration p.ApplyLinearImpulse( (ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power), impulse_pos, True, ) self.lander.ApplyLinearImpulse( (-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power), impulse_pos, True, ) s_power = 0.0 if (self.continuous and np.abs(action[1]) > 0.5) or ( not self.continuous and action in [1, 3] ): # Orientation engines if self.continuous: direction = np.sign(action[1]) s_power = np.clip(np.abs(action[1]), 0.5, 1.0) assert s_power >= 0.5 and s_power <= 1.0 else: direction = action - 2 s_power = 1.0 ox = tip[0] * dispersion[0] + side[0] * ( 3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE ) oy = -tip[1] * dispersion[0] - side[1] * ( 3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE ) impulse_pos = ( self.lander.position[0] + ox - tip[0] * 17 / SCALE, self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE, ) p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power) p.ApplyLinearImpulse( (ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power), impulse_pos, True, ) self.lander.ApplyLinearImpulse( (-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power), impulse_pos, True, ) self.world.Step(1.0 / FPS, 6 * 30, 2 * 30) pos = self.lander.position vel = self.lander.linearVelocity state = [ (pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2), (pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2), vel.x * (VIEWPORT_W / SCALE / 2) / FPS, vel.y * (VIEWPORT_H / SCALE / 2) / FPS, self.lander.angle, 20.0 * self.lander.angularVelocity / FPS, 1.0 if self.legs[0].ground_contact else 0.0, 1.0 if self.legs[1].ground_contact else 0.0, ] assert len(state) == 8 reward = 0 shaping = ( -100 * np.sqrt(state[0] * state[0] + state[1] * state[1]) - 100 * np.sqrt(state[2] * state[2] + state[3] * state[3]) - 100 * abs(state[4]) + 10 * state[6] + 10 * state[7] ) # And ten points for legs contact, the idea is if you # lose contact again after landing, you get negative reward if self.prev_shaping is not None: reward = shaping - self.prev_shaping self.prev_shaping = shaping reward -= ( m_power * 0.30 ) # less fuel spent is better, about -30 for heuristic landing reward -= s_power * 0.03 terminated = False if self.game_over or abs(state[0]) >= 1.0: terminated = True reward = -100 if not self.lander.awake: terminated = True reward = +100 if self.render_mode == "human": self.render() return np.array(state, dtype=np.float32), reward, terminated, False, {} def render(self): if self.render_mode is None: gym.logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) return try: import pygame from pygame import gfxdraw except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[box2d]`" ) if self.screen is None and self.render_mode == "human": pygame.init() pygame.display.init() self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H)) if self.clock is None: self.clock = pygame.time.Clock() self.surf = pygame.Surface((VIEWPORT_W, VIEWPORT_H)) pygame.transform.scale(self.surf, (SCALE, SCALE)) pygame.draw.rect(self.surf, (255, 255, 255), self.surf.get_rect()) for obj in self.particles: obj.ttl -= 0.15 obj.color1 = ( int(max(0.2, 0.15 + obj.ttl) * 255), int(max(0.2, 0.5 * obj.ttl) * 255), int(max(0.2, 0.5 * obj.ttl) * 255), ) obj.color2 = ( int(max(0.2, 0.15 + obj.ttl) * 255), int(max(0.2, 0.5 * obj.ttl) * 255), int(max(0.2, 0.5 * obj.ttl) * 255), ) self._clean_particles(False) for p in self.sky_polys: scaled_poly = [] for coord in p: scaled_poly.append((coord[0] * SCALE, coord[1] * SCALE)) pygame.draw.polygon(self.surf, (0, 0, 0), scaled_poly) gfxdraw.aapolygon(self.surf, scaled_poly, (0, 0, 0)) for obj in self.particles + self.drawlist: for f in obj.fixtures: trans = f.body.transform if type(f.shape) is circleShape: pygame.draw.circle( self.surf, color=obj.color1, center=trans * f.shape.pos * SCALE, radius=f.shape.radius * SCALE, ) pygame.draw.circle( self.surf, color=obj.color2, center=trans * f.shape.pos * SCALE, radius=f.shape.radius * SCALE, ) else: path = [trans * v * SCALE for v in f.shape.vertices] pygame.draw.polygon(self.surf, color=obj.color1, points=path) gfxdraw.aapolygon(self.surf, path, obj.color1) pygame.draw.aalines( self.surf, color=obj.color2, points=path, closed=True ) for x in [self.helipad_x1, self.helipad_x2]: x = x * SCALE flagy1 = self.helipad_y * SCALE flagy2 = flagy1 + 50 pygame.draw.line( self.surf, color=(255, 255, 255), start_pos=(x, flagy1), end_pos=(x, flagy2), width=1, ) pygame.draw.polygon( self.surf, color=(204, 204, 0), points=[ (x, flagy2), (x, flagy2 - 10), (x + 25, flagy2 - 5), ], ) gfxdraw.aapolygon( self.surf, [(x, flagy2), (x, flagy2 - 10), (x + 25, flagy2 - 5)], (204, 204, 0), ) self.surf = pygame.transform.flip(self.surf, False, True) if self.render_mode == "human": assert self.screen is not None self.screen.blit(self.surf, (0, 0)) pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() elif self.render_mode == "rgb_array": return np.transpose( np.array(pygame.surfarray.pixels3d(self.surf)), axes=(1, 0, 2) ) def close(self): if self.screen is not None: import pygame pygame.display.quit() pygame.quit() self.isopen = False def heuristic(env, s): """ The heuristic for 1. Testing 2. Demonstration rollout. Args: env: The environment s (list): The state. Attributes: s[0] is the horizontal coordinate s[1] is the vertical coordinate s[2] is the horizontal speed s[3] is the vertical speed s[4] is the angle s[5] is the angular speed s[6] 1 if first leg has contact, else 0 s[7] 1 if second leg has contact, else 0 Returns: a: The heuristic to be fed into the step function defined above to determine the next step and reward. """ angle_targ = s[0] * 0.5 + s[2] * 1.0 # angle should point towards center if angle_targ > 0.4: angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad if angle_targ < -0.4: angle_targ = -0.4 hover_targ = 0.55 * np.abs( s[0] ) # target y should be proportional to horizontal offset angle_todo = (angle_targ - s[4]) * 0.5 - (s[5]) * 1.0 hover_todo = (hover_targ - s[1]) * 0.5 - (s[3]) * 0.5 if s[6] or s[7]: # legs have contact angle_todo = 0 hover_todo = ( -(s[3]) * 0.5 ) # override to reduce fall speed, that's all we need after contact if env.continuous: a = np.array([hover_todo * 20 - 1, -angle_todo * 20]) a = np.clip(a, -1, +1) else: a = 0 if hover_todo > np.abs(angle_todo) and hover_todo > 0.05: a = 2 elif angle_todo < -0.05: a = 3 elif angle_todo > +0.05: a = 1 return a def demo_heuristic_lander(env, seed=None, render=False): total_reward = 0 steps = 0 s, info = env.reset(seed=seed) while True: a = heuristic(env, s) s, r, terminated, truncated, info = step_api_compatibility(env.step(a), True) total_reward += r if render: still_open = env.render() if still_open is False: break if steps % 20 == 0 or terminated or truncated: print("observations:", " ".join([f"{x:+0.2f}" for x in s])) print(f"step {steps} total_reward {total_reward:+0.2f}") steps += 1 if terminated or truncated: break if render: env.close() return total_reward class LunarLanderContinuous: def __init__(self): raise error.Error( "Error initializing LunarLanderContinuous Environment.\n" "Currently, we do not support initializing this mode of environment by calling the class directly.\n" "To use this environment, instead create it by specifying the continuous keyword in gym.make, i.e.\n" 'gym.make("LunarLander-v2", continuous=True)' ) if __name__ == "__main__": demo_heuristic_lander(LunarLander(), render=True) ================================================ FILE: gym/envs/classic_control/__init__.py ================================================ from gym.envs.classic_control.acrobot import AcrobotEnv from gym.envs.classic_control.cartpole import CartPoleEnv from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv from gym.envs.classic_control.mountain_car import MountainCarEnv from gym.envs.classic_control.pendulum import PendulumEnv ================================================ FILE: gym/envs/classic_control/acrobot.py ================================================ """classic Acrobot task""" from typing import Optional import numpy as np from numpy import cos, pi, sin from gym import core, logger, spaces from gym.error import DependencyNotInstalled __copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy" __credits__ = [ "Alborz Geramifard", "Robert H. Klein", "Christoph Dann", "William Dabney", "Jonathan P. How", ] __license__ = "BSD 3-Clause" __author__ = "Christoph Dann " # SOURCE: # https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py from gym.envs.classic_control import utils class AcrobotEnv(core.Env): """ ### Description The Acrobot environment is based on Sutton's work in ["Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding"](https://papers.nips.cc/paper/1995/hash/8f1d43620bc6bb580df6e80b0dc05c48-Abstract.html) and [Sutton and Barto's book](http://www.incompleteideas.net/book/the-book-2nd.html). The system consists of two links connected linearly to form a chain, with one end of the chain fixed. The joint between the two links is actuated. The goal is to apply torques on the actuated joint to swing the free end of the linear chain above a given height while starting from the initial state of hanging downwards. As seen in the **Gif**: two blue links connected by two green joints. The joint in between the two links is actuated. The goal is to swing the free end of the outer-link to reach the target height (black horizontal line above system) by applying torque on the actuator. ### Action Space The action is discrete, deterministic, and represents the torque applied on the actuated joint between the two links. | Num | Action | Unit | |-----|---------------------------------------|--------------| | 0 | apply -1 torque to the actuated joint | torque (N m) | | 1 | apply 0 torque to the actuated joint | torque (N m) | | 2 | apply 1 torque to the actuated joint | torque (N m) | ### Observation Space The observation is a `ndarray` with shape `(6,)` that provides information about the two rotational joint angles as well as their angular velocities: | Num | Observation | Min | Max | |-----|------------------------------|---------------------|-------------------| | 0 | Cosine of `theta1` | -1 | 1 | | 1 | Sine of `theta1` | -1 | 1 | | 2 | Cosine of `theta2` | -1 | 1 | | 3 | Sine of `theta2` | -1 | 1 | | 4 | Angular velocity of `theta1` | ~ -12.567 (-4 * pi) | ~ 12.567 (4 * pi) | | 5 | Angular velocity of `theta2` | ~ -28.274 (-9 * pi) | ~ 28.274 (9 * pi) | where - `theta1` is the angle of the first joint, where an angle of 0 indicates the first link is pointing directly downwards. - `theta2` is ***relative to the angle of the first link.*** An angle of 0 corresponds to having the same angle between the two links. The angular velocities of `theta1` and `theta2` are bounded at ±4π, and ±9π rad/s respectively. A state of `[1, 0, 1, 0, ..., ...]` indicates that both links are pointing downwards. ### Rewards The goal is to have the free end reach a designated target height in as few steps as possible, and as such all steps that do not reach the goal incur a reward of -1. Achieving the target height results in termination with a reward of 0. The reward threshold is -100. ### Starting State Each parameter in the underlying state (`theta1`, `theta2`, and the two angular velocities) is initialized uniformly between -0.1 and 0.1. This means both links are pointing downwards with some initial stochasticity. ### Episode End The episode ends if one of the following occurs: 1. Termination: The free end reaches the target height, which is constructed as: `-cos(theta1) - cos(theta2 + theta1) > 1.0` 2. Truncation: Episode length is greater than 500 (200 for v0) ### Arguments No additional arguments are currently supported. ``` env = gym.make('Acrobot-v1') ``` By default, the dynamics of the acrobot follow those described in Sutton and Barto's book [Reinforcement Learning: An Introduction](http://incompleteideas.net/book/11/node4.html). However, a `book_or_nips` parameter can be modified to change the pendulum dynamics to those described in the original [NeurIPS paper](https://papers.nips.cc/paper/1995/hash/8f1d43620bc6bb580df6e80b0dc05c48-Abstract.html). ``` # To change the dynamics as described above env.env.book_or_nips = 'nips' ``` See the following note and the [implementation](https://github.com/openai/gym/blob/master/gym/envs/classic_control/acrobot.py) for details: > The dynamics equations were missing some terms in the NIPS paper which are present in the book. R. Sutton confirmed in personal correspondence that the experimental results shown in the paper and the book were generated with the equations shown in the book. However, there is the option to run the domain with the paper equations by setting `book_or_nips = 'nips'` ### Version History - v1: Maximum number of steps increased from 200 to 500. The observation space for v0 provided direct readings of `theta1` and `theta2` in radians, having a range of `[-pi, pi]`. The v1 observation space as described here provides the sine and cosine of each angle instead. - v0: Initial versions release (1.0.0) (removed from gym for v1) ### References - Sutton, R. S. (1996). Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding. In D. Touretzky, M. C. Mozer, & M. Hasselmo (Eds.), Advances in Neural Information Processing Systems (Vol. 8). MIT Press. https://proceedings.neurips.cc/paper/1995/file/8f1d43620bc6bb580df6e80b0dc05c48-Paper.pdf - Sutton, R. S., Barto, A. G. (2018 ). Reinforcement Learning: An Introduction. The MIT Press. """ metadata = { "render_modes": ["human", "rgb_array"], "render_fps": 15, } dt = 0.2 LINK_LENGTH_1 = 1.0 # [m] LINK_LENGTH_2 = 1.0 # [m] LINK_MASS_1 = 1.0 #: [kg] mass of link 1 LINK_MASS_2 = 1.0 #: [kg] mass of link 2 LINK_COM_POS_1 = 0.5 #: [m] position of the center of mass of link 1 LINK_COM_POS_2 = 0.5 #: [m] position of the center of mass of link 2 LINK_MOI = 1.0 #: moments of inertia for both links MAX_VEL_1 = 4 * pi MAX_VEL_2 = 9 * pi AVAIL_TORQUE = [-1.0, 0.0, +1] torque_noise_max = 0.0 SCREEN_DIM = 500 #: use dynamics equations from the nips paper or the book book_or_nips = "book" action_arrow = None domain_fig = None actions_num = 3 def __init__(self, render_mode: Optional[str] = None): self.render_mode = render_mode self.screen = None self.clock = None self.isopen = True high = np.array( [1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2], dtype=np.float32 ) low = -high self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32) self.action_space = spaces.Discrete(3) self.state = None def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) # Note that if you use custom reset bounds, it may lead to out-of-bound # state/observations. low, high = utils.maybe_parse_reset_bounds( options, -0.1, 0.1 # default low ) # default high self.state = self.np_random.uniform(low=low, high=high, size=(4,)).astype( np.float32 ) if self.render_mode == "human": self.render() return self._get_ob(), {} def step(self, a): s = self.state assert s is not None, "Call reset before using AcrobotEnv object." torque = self.AVAIL_TORQUE[a] # Add noise to the force action if self.torque_noise_max > 0: torque += self.np_random.uniform( -self.torque_noise_max, self.torque_noise_max ) # Now, augment the state with our force action so it can be passed to # _dsdt s_augmented = np.append(s, torque) ns = rk4(self._dsdt, s_augmented, [0, self.dt]) ns[0] = wrap(ns[0], -pi, pi) ns[1] = wrap(ns[1], -pi, pi) ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1) ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2) self.state = ns terminated = self._terminal() reward = -1.0 if not terminated else 0.0 if self.render_mode == "human": self.render() return (self._get_ob(), reward, terminated, False, {}) def _get_ob(self): s = self.state assert s is not None, "Call reset before using AcrobotEnv object." return np.array( [cos(s[0]), sin(s[0]), cos(s[1]), sin(s[1]), s[2], s[3]], dtype=np.float32 ) def _terminal(self): s = self.state assert s is not None, "Call reset before using AcrobotEnv object." return bool(-cos(s[0]) - cos(s[1] + s[0]) > 1.0) def _dsdt(self, s_augmented): m1 = self.LINK_MASS_1 m2 = self.LINK_MASS_2 l1 = self.LINK_LENGTH_1 lc1 = self.LINK_COM_POS_1 lc2 = self.LINK_COM_POS_2 I1 = self.LINK_MOI I2 = self.LINK_MOI g = 9.8 a = s_augmented[-1] s = s_augmented[:-1] theta1 = s[0] theta2 = s[1] dtheta1 = s[2] dtheta2 = s[3] d1 = ( m1 * lc1**2 + m2 * (l1**2 + lc2**2 + 2 * l1 * lc2 * cos(theta2)) + I1 + I2 ) d2 = m2 * (lc2**2 + l1 * lc2 * cos(theta2)) + I2 phi2 = m2 * lc2 * g * cos(theta1 + theta2 - pi / 2.0) phi1 = ( -m2 * l1 * lc2 * dtheta2**2 * sin(theta2) - 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * sin(theta2) + (m1 * lc1 + m2 * l1) * g * cos(theta1 - pi / 2) + phi2 ) if self.book_or_nips == "nips": # the following line is consistent with the description in the # paper ddtheta2 = (a + d2 / d1 * phi1 - phi2) / (m2 * lc2**2 + I2 - d2**2 / d1) else: # the following line is consistent with the java implementation and the # book ddtheta2 = ( a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1**2 * sin(theta2) - phi2 ) / (m2 * lc2**2 + I2 - d2**2 / d1) ddtheta1 = -(d2 * ddtheta2 + phi1) / d1 return dtheta1, dtheta2, ddtheta1, ddtheta2, 0.0 def render(self): if self.render_mode is None: logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) return try: import pygame from pygame import gfxdraw except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[classic_control]`" ) if self.screen is None: pygame.init() if self.render_mode == "human": pygame.display.init() self.screen = pygame.display.set_mode( (self.SCREEN_DIM, self.SCREEN_DIM) ) else: # mode in "rgb_array" self.screen = pygame.Surface((self.SCREEN_DIM, self.SCREEN_DIM)) if self.clock is None: self.clock = pygame.time.Clock() surf = pygame.Surface((self.SCREEN_DIM, self.SCREEN_DIM)) surf.fill((255, 255, 255)) s = self.state bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2 # 2.2 for default scale = self.SCREEN_DIM / (bound * 2) offset = self.SCREEN_DIM / 2 if s is None: return None p1 = [ -self.LINK_LENGTH_1 * cos(s[0]) * scale, self.LINK_LENGTH_1 * sin(s[0]) * scale, ] p2 = [ p1[0] - self.LINK_LENGTH_2 * cos(s[0] + s[1]) * scale, p1[1] + self.LINK_LENGTH_2 * sin(s[0] + s[1]) * scale, ] xys = np.array([[0, 0], p1, p2])[:, ::-1] thetas = [s[0] - pi / 2, s[0] + s[1] - pi / 2] link_lengths = [self.LINK_LENGTH_1 * scale, self.LINK_LENGTH_2 * scale] pygame.draw.line( surf, start_pos=(-2.2 * scale + offset, 1 * scale + offset), end_pos=(2.2 * scale + offset, 1 * scale + offset), color=(0, 0, 0), ) for ((x, y), th, llen) in zip(xys, thetas, link_lengths): x = x + offset y = y + offset l, r, t, b = 0, llen, 0.1 * scale, -0.1 * scale coords = [(l, b), (l, t), (r, t), (r, b)] transformed_coords = [] for coord in coords: coord = pygame.math.Vector2(coord).rotate_rad(th) coord = (coord[0] + x, coord[1] + y) transformed_coords.append(coord) gfxdraw.aapolygon(surf, transformed_coords, (0, 204, 204)) gfxdraw.filled_polygon(surf, transformed_coords, (0, 204, 204)) gfxdraw.aacircle(surf, int(x), int(y), int(0.1 * scale), (204, 204, 0)) gfxdraw.filled_circle(surf, int(x), int(y), int(0.1 * scale), (204, 204, 0)) surf = pygame.transform.flip(surf, False, True) self.screen.blit(surf, (0, 0)) if self.render_mode == "human": pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() elif self.render_mode == "rgb_array": return np.transpose( np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) ) def close(self): if self.screen is not None: import pygame pygame.display.quit() pygame.quit() self.isopen = False def wrap(x, m, M): """Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n For example, m = -180, M = 180 (degrees), x = 360 --> returns 0. Args: x: a scalar m: minimum possible value in range M: maximum possible value in range Returns: x: a scalar, wrapped """ diff = M - m while x > M: x = x - diff while x < m: x = x + diff return x def bound(x, m, M=None): """Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR* have m as length 2 vector, bound(x,m, ) returns m[0] <= x <= m[1]. Args: x: scalar m: The lower bound M: The upper bound Returns: x: scalar, bound between min (m) and Max (M) """ if M is None: M = m[1] m = m[0] # bound x between min (m) and Max (M) return min(max(x, m), M) def rk4(derivs, y0, t): """ Integrate 1-D or N-D system of ODEs using 4-th order Runge-Kutta. Example for 2D system: >>> def derivs(x): ... d1 = x[0] + 2*x[1] ... d2 = -3*x[0] + 4*x[1] ... return d1, d2 >>> dt = 0.0005 >>> t = np.arange(0.0, 2.0, dt) >>> y0 = (1,2) >>> yout = rk4(derivs, y0, t) Args: derivs: the derivative of the system and has the signature ``dy = derivs(yi)`` y0: initial state vector t: sample times Returns: yout: Runge-Kutta approximation of the ODE """ try: Ny = len(y0) except TypeError: yout = np.zeros((len(t),), np.float_) else: yout = np.zeros((len(t), Ny), np.float_) yout[0] = y0 for i in np.arange(len(t) - 1): this = t[i] dt = t[i + 1] - this dt2 = dt / 2.0 y0 = yout[i] k1 = np.asarray(derivs(y0)) k2 = np.asarray(derivs(y0 + dt2 * k1)) k3 = np.asarray(derivs(y0 + dt2 * k2)) k4 = np.asarray(derivs(y0 + dt * k3)) yout[i + 1] = y0 + dt / 6.0 * (k1 + 2 * k2 + 2 * k3 + k4) # We only care about the final timestep and we cleave off action value which will be zero return yout[-1][:4] ================================================ FILE: gym/envs/classic_control/cartpole.py ================================================ """ Classic cart-pole system implemented by Rich Sutton et al. Copied from http://incompleteideas.net/sutton/book/code/pole.c permalink: https://perma.cc/C9ZM-652R """ import math from typing import Optional, Union import numpy as np import gym from gym import logger, spaces from gym.envs.classic_control import utils from gym.error import DependencyNotInstalled class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]): """ ### Description This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson in ["Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem"](https://ieeexplore.ieee.org/document/6313077). A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track. The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces in the left and right direction on the cart. ### Action Space The action is a `ndarray` with shape `(1,)` which can take values `{0, 1}` indicating the direction of the fixed force the cart is pushed with. | Num | Action | |-----|------------------------| | 0 | Push cart to the left | | 1 | Push cart to the right | **Note**: The velocity that is reduced or increased by the applied force is not fixed and it depends on the angle the pole is pointing. The center of gravity of the pole varies the amount of energy needed to move the cart underneath it ### Observation Space The observation is a `ndarray` with shape `(4,)` with the values corresponding to the following positions and velocities: | Num | Observation | Min | Max | |-----|-----------------------|---------------------|-------------------| | 0 | Cart Position | -4.8 | 4.8 | | 1 | Cart Velocity | -Inf | Inf | | 2 | Pole Angle | ~ -0.418 rad (-24°) | ~ 0.418 rad (24°) | | 3 | Pole Angular Velocity | -Inf | Inf | **Note:** While the ranges above denote the possible values for observation space of each element, it is not reflective of the allowed values of the state space in an unterminated episode. Particularly: - The cart x-position (index 0) can be take values between `(-4.8, 4.8)`, but the episode terminates if the cart leaves the `(-2.4, 2.4)` range. - The pole angle can be observed between `(-.418, .418)` radians (or **±24°**), but the episode terminates if the pole angle is not in the range `(-.2095, .2095)` (or **±12°**) ### Rewards Since the goal is to keep the pole upright for as long as possible, a reward of `+1` for every step taken, including the termination step, is allotted. The threshold for rewards is 475 for v1. ### Starting State All observations are assigned a uniformly random value in `(-0.05, 0.05)` ### Episode End The episode ends if any one of the following occurs: 1. Termination: Pole Angle is greater than ±12° 2. Termination: Cart Position is greater than ±2.4 (center of the cart reaches the edge of the display) 3. Truncation: Episode length is greater than 500 (200 for v0) ### Arguments ``` gym.make('CartPole-v1') ``` No additional arguments are currently supported. """ metadata = { "render_modes": ["human", "rgb_array"], "render_fps": 50, } def __init__(self, render_mode: Optional[str] = None): self.gravity = 9.8 self.masscart = 1.0 self.masspole = 0.1 self.total_mass = self.masspole + self.masscart self.length = 0.5 # actually half the pole's length self.polemass_length = self.masspole * self.length self.force_mag = 10.0 self.tau = 0.02 # seconds between state updates self.kinematics_integrator = "euler" # Angle at which to fail the episode self.theta_threshold_radians = 12 * 2 * math.pi / 360 self.x_threshold = 2.4 # Angle limit set to 2 * theta_threshold_radians so failing observation # is still within bounds. high = np.array( [ self.x_threshold * 2, np.finfo(np.float32).max, self.theta_threshold_radians * 2, np.finfo(np.float32).max, ], dtype=np.float32, ) self.action_space = spaces.Discrete(2) self.observation_space = spaces.Box(-high, high, dtype=np.float32) self.render_mode = render_mode self.screen_width = 600 self.screen_height = 400 self.screen = None self.clock = None self.isopen = True self.state = None self.steps_beyond_terminated = None def step(self, action): err_msg = f"{action!r} ({type(action)}) invalid" assert self.action_space.contains(action), err_msg assert self.state is not None, "Call reset before using step method." x, x_dot, theta, theta_dot = self.state force = self.force_mag if action == 1 else -self.force_mag costheta = math.cos(theta) sintheta = math.sin(theta) # For the interested reader: # https://coneural.org/florian/papers/05_cart_pole.pdf temp = ( force + self.polemass_length * theta_dot**2 * sintheta ) / self.total_mass thetaacc = (self.gravity * sintheta - costheta * temp) / ( self.length * (4.0 / 3.0 - self.masspole * costheta**2 / self.total_mass) ) xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass if self.kinematics_integrator == "euler": x = x + self.tau * x_dot x_dot = x_dot + self.tau * xacc theta = theta + self.tau * theta_dot theta_dot = theta_dot + self.tau * thetaacc else: # semi-implicit euler x_dot = x_dot + self.tau * xacc x = x + self.tau * x_dot theta_dot = theta_dot + self.tau * thetaacc theta = theta + self.tau * theta_dot self.state = (x, x_dot, theta, theta_dot) terminated = bool( x < -self.x_threshold or x > self.x_threshold or theta < -self.theta_threshold_radians or theta > self.theta_threshold_radians ) if not terminated: reward = 1.0 elif self.steps_beyond_terminated is None: # Pole just fell! self.steps_beyond_terminated = 0 reward = 1.0 else: if self.steps_beyond_terminated == 0: logger.warn( "You are calling 'step()' even though this " "environment has already returned terminated = True. You " "should always call 'reset()' once you receive 'terminated = " "True' -- any further steps are undefined behavior." ) self.steps_beyond_terminated += 1 reward = 0.0 if self.render_mode == "human": self.render() return np.array(self.state, dtype=np.float32), reward, terminated, False, {} def reset( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ): super().reset(seed=seed) # Note that if you use custom reset bounds, it may lead to out-of-bound # state/observations. low, high = utils.maybe_parse_reset_bounds( options, -0.05, 0.05 # default low ) # default high self.state = self.np_random.uniform(low=low, high=high, size=(4,)) self.steps_beyond_terminated = None if self.render_mode == "human": self.render() return np.array(self.state, dtype=np.float32), {} def render(self): if self.render_mode is None: gym.logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) return try: import pygame from pygame import gfxdraw except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[classic_control]`" ) if self.screen is None: pygame.init() if self.render_mode == "human": pygame.display.init() self.screen = pygame.display.set_mode( (self.screen_width, self.screen_height) ) else: # mode == "rgb_array" self.screen = pygame.Surface((self.screen_width, self.screen_height)) if self.clock is None: self.clock = pygame.time.Clock() world_width = self.x_threshold * 2 scale = self.screen_width / world_width polewidth = 10.0 polelen = scale * (2 * self.length) cartwidth = 50.0 cartheight = 30.0 if self.state is None: return None x = self.state self.surf = pygame.Surface((self.screen_width, self.screen_height)) self.surf.fill((255, 255, 255)) l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2 axleoffset = cartheight / 4.0 cartx = x[0] * scale + self.screen_width / 2.0 # MIDDLE OF CART carty = 100 # TOP OF CART cart_coords = [(l, b), (l, t), (r, t), (r, b)] cart_coords = [(c[0] + cartx, c[1] + carty) for c in cart_coords] gfxdraw.aapolygon(self.surf, cart_coords, (0, 0, 0)) gfxdraw.filled_polygon(self.surf, cart_coords, (0, 0, 0)) l, r, t, b = ( -polewidth / 2, polewidth / 2, polelen - polewidth / 2, -polewidth / 2, ) pole_coords = [] for coord in [(l, b), (l, t), (r, t), (r, b)]: coord = pygame.math.Vector2(coord).rotate_rad(-x[2]) coord = (coord[0] + cartx, coord[1] + carty + axleoffset) pole_coords.append(coord) gfxdraw.aapolygon(self.surf, pole_coords, (202, 152, 101)) gfxdraw.filled_polygon(self.surf, pole_coords, (202, 152, 101)) gfxdraw.aacircle( self.surf, int(cartx), int(carty + axleoffset), int(polewidth / 2), (129, 132, 203), ) gfxdraw.filled_circle( self.surf, int(cartx), int(carty + axleoffset), int(polewidth / 2), (129, 132, 203), ) gfxdraw.hline(self.surf, 0, self.screen_width, carty, (0, 0, 0)) self.surf = pygame.transform.flip(self.surf, False, True) self.screen.blit(self.surf, (0, 0)) if self.render_mode == "human": pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() elif self.render_mode == "rgb_array": return np.transpose( np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) ) def close(self): if self.screen is not None: import pygame pygame.display.quit() pygame.quit() self.isopen = False ================================================ FILE: gym/envs/classic_control/continuous_mountain_car.py ================================================ """ @author: Olivier Sigaud A merge between two sources: * Adaptation of the MountainCar Environment from the "FAReinforcement" library of Jose Antonio Martin H. (version 1.0), adapted by 'Tom Schaul, tom@idsia.ch' and then modified by Arnaud de Broissia * the gym MountainCar environment itself from http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp permalink: https://perma.cc/6Z2N-PFWC """ import math from typing import Optional import numpy as np import gym from gym import spaces from gym.envs.classic_control import utils from gym.error import DependencyNotInstalled class Continuous_MountainCarEnv(gym.Env): """ ### Description The Mountain Car MDP is a deterministic MDP that consists of a car placed stochastically at the bottom of a sinusoidal valley, with the only possible actions being the accelerations that can be applied to the car in either direction. The goal of the MDP is to strategically accelerate the car to reach the goal state on top of the right hill. There are two versions of the mountain car domain in gym: one with discrete actions and one with continuous. This version is the one with continuous actions. This MDP first appeared in [Andrew Moore's PhD Thesis (1990)](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-209.pdf) ``` @TECHREPORT{Moore90efficientmemory-based, author = {Andrew William Moore}, title = {Efficient Memory-based Learning for Robot Control}, institution = {University of Cambridge}, year = {1990} } ``` ### Observation Space The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following: | Num | Observation | Min | Max | Unit | |-----|--------------------------------------|------|-----|--------------| | 0 | position of the car along the x-axis | -Inf | Inf | position (m) | | 1 | velocity of the car | -Inf | Inf | position (m) | ### Action Space The action is a `ndarray` with shape `(1,)`, representing the directional force applied on the car. The action is clipped in the range `[-1,1]` and multiplied by a power of 0.0015. ### Transition Dynamics: Given an action, the mountain car follows the following transition dynamics: *velocityt+1 = velocityt+1 + force * self.power - 0.0025 * cos(3 * positiont)* *positiont+1 = positiont + velocityt+1* where force is the action clipped to the range `[-1,1]` and power is a constant 0.0015. The collisions at either end are inelastic with the velocity set to 0 upon collision with the wall. The position is clipped to the range [-1.2, 0.6] and velocity is clipped to the range [-0.07, 0.07]. ### Reward A negative reward of *-0.1 * action2* is received at each timestep to penalise for taking actions of large magnitude. If the mountain car reaches the goal then a positive reward of +100 is added to the negative reward for that timestep. ### Starting State The position of the car is assigned a uniform random value in `[-0.6 , -0.4]`. The starting velocity of the car is always assigned to 0. ### Episode End The episode ends if either of the following happens: 1. Termination: The position of the car is greater than or equal to 0.45 (the goal position on top of the right hill) 2. Truncation: The length of the episode is 999. ### Arguments ``` gym.make('MountainCarContinuous-v0') ``` ### Version History * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": ["human", "rgb_array"], "render_fps": 30, } def __init__(self, render_mode: Optional[str] = None, goal_velocity=0): self.min_action = -1.0 self.max_action = 1.0 self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = ( 0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version ) self.goal_velocity = goal_velocity self.power = 0.0015 self.low_state = np.array( [self.min_position, -self.max_speed], dtype=np.float32 ) self.high_state = np.array( [self.max_position, self.max_speed], dtype=np.float32 ) self.render_mode = render_mode self.screen_width = 600 self.screen_height = 400 self.screen = None self.clock = None self.isopen = True self.action_space = spaces.Box( low=self.min_action, high=self.max_action, shape=(1,), dtype=np.float32 ) self.observation_space = spaces.Box( low=self.low_state, high=self.high_state, dtype=np.float32 ) def step(self, action: np.ndarray): position = self.state[0] velocity = self.state[1] force = min(max(action[0], self.min_action), self.max_action) velocity += force * self.power - 0.0025 * math.cos(3 * position) if velocity > self.max_speed: velocity = self.max_speed if velocity < -self.max_speed: velocity = -self.max_speed position += velocity if position > self.max_position: position = self.max_position if position < self.min_position: position = self.min_position if position == self.min_position and velocity < 0: velocity = 0 # Convert a possible numpy bool to a Python bool. terminated = bool( position >= self.goal_position and velocity >= self.goal_velocity ) reward = 0 if terminated: reward = 100.0 reward -= math.pow(action[0], 2) * 0.1 self.state = np.array([position, velocity], dtype=np.float32) if self.render_mode == "human": self.render() return self.state, reward, terminated, False, {} def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) # Note that if you use custom reset bounds, it may lead to out-of-bound # state/observations. low, high = utils.maybe_parse_reset_bounds(options, -0.6, -0.4) self.state = np.array([self.np_random.uniform(low=low, high=high), 0]) if self.render_mode == "human": self.render() return np.array(self.state, dtype=np.float32), {} def _height(self, xs): return np.sin(3 * xs) * 0.45 + 0.55 def render(self): if self.render_mode is None: gym.logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) return try: import pygame from pygame import gfxdraw except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[classic_control]`" ) if self.screen is None: pygame.init() if self.render_mode == "human": pygame.display.init() self.screen = pygame.display.set_mode( (self.screen_width, self.screen_height) ) else: # mode == "rgb_array": self.screen = pygame.Surface((self.screen_width, self.screen_height)) if self.clock is None: self.clock = pygame.time.Clock() world_width = self.max_position - self.min_position scale = self.screen_width / world_width carwidth = 40 carheight = 20 self.surf = pygame.Surface((self.screen_width, self.screen_height)) self.surf.fill((255, 255, 255)) pos = self.state[0] xs = np.linspace(self.min_position, self.max_position, 100) ys = self._height(xs) xys = list(zip((xs - self.min_position) * scale, ys * scale)) pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0)) clearance = 10 l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0 coords = [] for c in [(l, b), (l, t), (r, t), (r, b)]: c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos)) coords.append( ( c[0] + (pos - self.min_position) * scale, c[1] + clearance + self._height(pos) * scale, ) ) gfxdraw.aapolygon(self.surf, coords, (0, 0, 0)) gfxdraw.filled_polygon(self.surf, coords, (0, 0, 0)) for c in [(carwidth / 4, 0), (-carwidth / 4, 0)]: c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos)) wheel = ( int(c[0] + (pos - self.min_position) * scale), int(c[1] + clearance + self._height(pos) * scale), ) gfxdraw.aacircle( self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128) ) gfxdraw.filled_circle( self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128) ) flagx = int((self.goal_position - self.min_position) * scale) flagy1 = int(self._height(self.goal_position) * scale) flagy2 = flagy1 + 50 gfxdraw.vline(self.surf, flagx, flagy1, flagy2, (0, 0, 0)) gfxdraw.aapolygon( self.surf, [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)], (204, 204, 0), ) gfxdraw.filled_polygon( self.surf, [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)], (204, 204, 0), ) self.surf = pygame.transform.flip(self.surf, False, True) self.screen.blit(self.surf, (0, 0)) if self.render_mode == "human": pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() elif self.render_mode == "rgb_array": return np.transpose( np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) ) def close(self): if self.screen is not None: import pygame pygame.display.quit() pygame.quit() self.isopen = False ================================================ FILE: gym/envs/classic_control/mountain_car.py ================================================ """ http://incompleteideas.net/MountainCar/MountainCar1.cp permalink: https://perma.cc/6Z2N-PFWC """ import math from typing import Optional import numpy as np import gym from gym import spaces from gym.envs.classic_control import utils from gym.error import DependencyNotInstalled class MountainCarEnv(gym.Env): """ ### Description The Mountain Car MDP is a deterministic MDP that consists of a car placed stochastically at the bottom of a sinusoidal valley, with the only possible actions being the accelerations that can be applied to the car in either direction. The goal of the MDP is to strategically accelerate the car to reach the goal state on top of the right hill. There are two versions of the mountain car domain in gym: one with discrete actions and one with continuous. This version is the one with discrete actions. This MDP first appeared in [Andrew Moore's PhD Thesis (1990)](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-209.pdf) ``` @TECHREPORT{Moore90efficientmemory-based, author = {Andrew William Moore}, title = {Efficient Memory-based Learning for Robot Control}, institution = {University of Cambridge}, year = {1990} } ``` ### Observation Space The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following: | Num | Observation | Min | Max | Unit | |-----|--------------------------------------|------|-----|--------------| | 0 | position of the car along the x-axis | -Inf | Inf | position (m) | | 1 | velocity of the car | -Inf | Inf | position (m) | ### Action Space There are 3 discrete deterministic actions: | Num | Observation | Value | Unit | |-----|-------------------------|-------|--------------| | 0 | Accelerate to the left | Inf | position (m) | | 1 | Don't accelerate | Inf | position (m) | | 2 | Accelerate to the right | Inf | position (m) | ### Transition Dynamics: Given an action, the mountain car follows the following transition dynamics: *velocityt+1 = velocityt + (action - 1) * force - cos(3 * positiont) * gravity* *positiont+1 = positiont + velocityt+1* where force = 0.001 and gravity = 0.0025. The collisions at either end are inelastic with the velocity set to 0 upon collision with the wall. The position is clipped to the range `[-1.2, 0.6]` and velocity is clipped to the range `[-0.07, 0.07]`. ### Reward: The goal is to reach the flag placed on top of the right hill as quickly as possible, as such the agent is penalised with a reward of -1 for each timestep. ### Starting State The position of the car is assigned a uniform random value in *[-0.6 , -0.4]*. The starting velocity of the car is always assigned to 0. ### Episode End The episode ends if either of the following happens: 1. Termination: The position of the car is greater than or equal to 0.5 (the goal position on top of the right hill) 2. Truncation: The length of the episode is 200. ### Arguments ``` gym.make('MountainCar-v0') ``` ### Version History * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": ["human", "rgb_array"], "render_fps": 30, } def __init__(self, render_mode: Optional[str] = None, goal_velocity=0): self.min_position = -1.2 self.max_position = 0.6 self.max_speed = 0.07 self.goal_position = 0.5 self.goal_velocity = goal_velocity self.force = 0.001 self.gravity = 0.0025 self.low = np.array([self.min_position, -self.max_speed], dtype=np.float32) self.high = np.array([self.max_position, self.max_speed], dtype=np.float32) self.render_mode = render_mode self.screen_width = 600 self.screen_height = 400 self.screen = None self.clock = None self.isopen = True self.action_space = spaces.Discrete(3) self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32) def step(self, action: int): assert self.action_space.contains( action ), f"{action!r} ({type(action)}) invalid" position, velocity = self.state velocity += (action - 1) * self.force + math.cos(3 * position) * (-self.gravity) velocity = np.clip(velocity, -self.max_speed, self.max_speed) position += velocity position = np.clip(position, self.min_position, self.max_position) if position == self.min_position and velocity < 0: velocity = 0 terminated = bool( position >= self.goal_position and velocity >= self.goal_velocity ) reward = -1.0 self.state = (position, velocity) if self.render_mode == "human": self.render() return np.array(self.state, dtype=np.float32), reward, terminated, False, {} def reset( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ): super().reset(seed=seed) # Note that if you use custom reset bounds, it may lead to out-of-bound # state/observations. low, high = utils.maybe_parse_reset_bounds(options, -0.6, -0.4) self.state = np.array([self.np_random.uniform(low=low, high=high), 0]) if self.render_mode == "human": self.render() return np.array(self.state, dtype=np.float32), {} def _height(self, xs): return np.sin(3 * xs) * 0.45 + 0.55 def render(self): if self.render_mode is None: gym.logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) return try: import pygame from pygame import gfxdraw except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[classic_control]`" ) if self.screen is None: pygame.init() if self.render_mode == "human": pygame.display.init() self.screen = pygame.display.set_mode( (self.screen_width, self.screen_height) ) else: # mode in "rgb_array" self.screen = pygame.Surface((self.screen_width, self.screen_height)) if self.clock is None: self.clock = pygame.time.Clock() world_width = self.max_position - self.min_position scale = self.screen_width / world_width carwidth = 40 carheight = 20 self.surf = pygame.Surface((self.screen_width, self.screen_height)) self.surf.fill((255, 255, 255)) pos = self.state[0] xs = np.linspace(self.min_position, self.max_position, 100) ys = self._height(xs) xys = list(zip((xs - self.min_position) * scale, ys * scale)) pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0)) clearance = 10 l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0 coords = [] for c in [(l, b), (l, t), (r, t), (r, b)]: c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos)) coords.append( ( c[0] + (pos - self.min_position) * scale, c[1] + clearance + self._height(pos) * scale, ) ) gfxdraw.aapolygon(self.surf, coords, (0, 0, 0)) gfxdraw.filled_polygon(self.surf, coords, (0, 0, 0)) for c in [(carwidth / 4, 0), (-carwidth / 4, 0)]: c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos)) wheel = ( int(c[0] + (pos - self.min_position) * scale), int(c[1] + clearance + self._height(pos) * scale), ) gfxdraw.aacircle( self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128) ) gfxdraw.filled_circle( self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128) ) flagx = int((self.goal_position - self.min_position) * scale) flagy1 = int(self._height(self.goal_position) * scale) flagy2 = flagy1 + 50 gfxdraw.vline(self.surf, flagx, flagy1, flagy2, (0, 0, 0)) gfxdraw.aapolygon( self.surf, [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)], (204, 204, 0), ) gfxdraw.filled_polygon( self.surf, [(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)], (204, 204, 0), ) self.surf = pygame.transform.flip(self.surf, False, True) self.screen.blit(self.surf, (0, 0)) if self.render_mode == "human": pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() elif self.render_mode == "rgb_array": return np.transpose( np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) ) def get_keys_to_action(self): # Control with left and right arrow keys. return {(): 1, (276,): 0, (275,): 2, (275, 276): 1} def close(self): if self.screen is not None: import pygame pygame.display.quit() pygame.quit() self.isopen = False ================================================ FILE: gym/envs/classic_control/pendulum.py ================================================ __credits__ = ["Carlos Luis"] from os import path from typing import Optional import numpy as np import gym from gym import spaces from gym.envs.classic_control import utils from gym.error import DependencyNotInstalled DEFAULT_X = np.pi DEFAULT_Y = 1.0 class PendulumEnv(gym.Env): """ ### Description The inverted pendulum swingup problem is based on the classic problem in control theory. The system consists of a pendulum attached at one end to a fixed point, and the other end being free. The pendulum starts in a random position and the goal is to apply torque on the free end to swing it into an upright position, with its center of gravity right above the fixed point. The diagram below specifies the coordinate system used for the implementation of the pendulum's dynamic equations. ![Pendulum Coordinate System](./diagrams/pendulum.png) - `x-y`: cartesian coordinates of the pendulum's end in meters. - `theta` : angle in radians. - `tau`: torque in `N m`. Defined as positive _counter-clockwise_. ### Action Space The action is a `ndarray` with shape `(1,)` representing the torque applied to free end of the pendulum. | Num | Action | Min | Max | |-----|--------|------|-----| | 0 | Torque | -2.0 | 2.0 | ### Observation Space The observation is a `ndarray` with shape `(3,)` representing the x-y coordinates of the pendulum's free end and its angular velocity. | Num | Observation | Min | Max | |-----|------------------|------|-----| | 0 | x = cos(theta) | -1.0 | 1.0 | | 1 | y = sin(theta) | -1.0 | 1.0 | | 2 | Angular Velocity | -8.0 | 8.0 | ### Rewards The reward function is defined as: *r = -(theta2 + 0.1 * theta_dt2 + 0.001 * torque2)* where `$\theta$` is the pendulum's angle normalized between *[-pi, pi]* (with 0 being in the upright position). Based on the above equation, the minimum reward that can be obtained is *-(pi2 + 0.1 * 82 + 0.001 * 22) = -16.2736044*, while the maximum reward is zero (pendulum is upright with zero velocity and no torque applied). ### Starting State The starting state is a random angle in *[-pi, pi]* and a random angular velocity in *[-1,1]*. ### Episode Truncation The episode truncates at 200 time steps. ### Arguments - `g`: acceleration of gravity measured in *(m s-2)* used to calculate the pendulum dynamics. The default value is g = 10.0 . ``` gym.make('Pendulum-v1', g=9.81) ``` ### Version History * v1: Simplify the math equations, no difference in behavior. * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": ["human", "rgb_array"], "render_fps": 30, } def __init__(self, render_mode: Optional[str] = None, g=10.0): self.max_speed = 8 self.max_torque = 2.0 self.dt = 0.05 self.g = g self.m = 1.0 self.l = 1.0 self.render_mode = render_mode self.screen_dim = 500 self.screen = None self.clock = None self.isopen = True high = np.array([1.0, 1.0, self.max_speed], dtype=np.float32) # This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric # or normalised as max_torque == 2 by default. Ignoring the issue here as the default settings are too old # to update to follow the openai gym api self.action_space = spaces.Box( low=-self.max_torque, high=self.max_torque, shape=(1,), dtype=np.float32 ) self.observation_space = spaces.Box(low=-high, high=high, dtype=np.float32) def step(self, u): th, thdot = self.state # th := theta g = self.g m = self.m l = self.l dt = self.dt u = np.clip(u, -self.max_torque, self.max_torque)[0] self.last_u = u # for rendering costs = angle_normalize(th) ** 2 + 0.1 * thdot**2 + 0.001 * (u**2) newthdot = thdot + (3 * g / (2 * l) * np.sin(th) + 3.0 / (m * l**2) * u) * dt newthdot = np.clip(newthdot, -self.max_speed, self.max_speed) newth = th + newthdot * dt self.state = np.array([newth, newthdot]) if self.render_mode == "human": self.render() return self._get_obs(), -costs, False, False, {} def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) if options is None: high = np.array([DEFAULT_X, DEFAULT_Y]) else: # Note that if you use custom reset bounds, it may lead to out-of-bound # state/observations. x = options.get("x_init") if "x_init" in options else DEFAULT_X y = options.get("y_init") if "y_init" in options else DEFAULT_Y x = utils.verify_number_and_cast(x) y = utils.verify_number_and_cast(y) high = np.array([x, y]) low = -high # We enforce symmetric limits. self.state = self.np_random.uniform(low=low, high=high) self.last_u = None if self.render_mode == "human": self.render() return self._get_obs(), {} def _get_obs(self): theta, thetadot = self.state return np.array([np.cos(theta), np.sin(theta), thetadot], dtype=np.float32) def render(self): if self.render_mode is None: gym.logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) return try: import pygame from pygame import gfxdraw except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[classic_control]`" ) if self.screen is None: pygame.init() if self.render_mode == "human": pygame.display.init() self.screen = pygame.display.set_mode( (self.screen_dim, self.screen_dim) ) else: # mode in "rgb_array" self.screen = pygame.Surface((self.screen_dim, self.screen_dim)) if self.clock is None: self.clock = pygame.time.Clock() self.surf = pygame.Surface((self.screen_dim, self.screen_dim)) self.surf.fill((255, 255, 255)) bound = 2.2 scale = self.screen_dim / (bound * 2) offset = self.screen_dim // 2 rod_length = 1 * scale rod_width = 0.2 * scale l, r, t, b = 0, rod_length, rod_width / 2, -rod_width / 2 coords = [(l, b), (l, t), (r, t), (r, b)] transformed_coords = [] for c in coords: c = pygame.math.Vector2(c).rotate_rad(self.state[0] + np.pi / 2) c = (c[0] + offset, c[1] + offset) transformed_coords.append(c) gfxdraw.aapolygon(self.surf, transformed_coords, (204, 77, 77)) gfxdraw.filled_polygon(self.surf, transformed_coords, (204, 77, 77)) gfxdraw.aacircle(self.surf, offset, offset, int(rod_width / 2), (204, 77, 77)) gfxdraw.filled_circle( self.surf, offset, offset, int(rod_width / 2), (204, 77, 77) ) rod_end = (rod_length, 0) rod_end = pygame.math.Vector2(rod_end).rotate_rad(self.state[0] + np.pi / 2) rod_end = (int(rod_end[0] + offset), int(rod_end[1] + offset)) gfxdraw.aacircle( self.surf, rod_end[0], rod_end[1], int(rod_width / 2), (204, 77, 77) ) gfxdraw.filled_circle( self.surf, rod_end[0], rod_end[1], int(rod_width / 2), (204, 77, 77) ) fname = path.join(path.dirname(__file__), "assets/clockwise.png") img = pygame.image.load(fname) if self.last_u is not None: scale_img = pygame.transform.smoothscale( img, (scale * np.abs(self.last_u) / 2, scale * np.abs(self.last_u) / 2), ) is_flip = bool(self.last_u > 0) scale_img = pygame.transform.flip(scale_img, is_flip, True) self.surf.blit( scale_img, ( offset - scale_img.get_rect().centerx, offset - scale_img.get_rect().centery, ), ) # drawing axle gfxdraw.aacircle(self.surf, offset, offset, int(0.05 * scale), (0, 0, 0)) gfxdraw.filled_circle(self.surf, offset, offset, int(0.05 * scale), (0, 0, 0)) self.surf = pygame.transform.flip(self.surf, False, True) self.screen.blit(self.surf, (0, 0)) if self.render_mode == "human": pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() else: # mode == "rgb_array": return np.transpose( np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) ) def close(self): if self.screen is not None: import pygame pygame.display.quit() pygame.quit() self.isopen = False def angle_normalize(x): return ((x + np.pi) % (2 * np.pi)) - np.pi ================================================ FILE: gym/envs/classic_control/utils.py ================================================ """ Utility functions used for classic control environments. """ from typing import Optional, SupportsFloat, Tuple def verify_number_and_cast(x: SupportsFloat) -> float: """Verify parameter is a single number and cast to a float.""" try: x = float(x) except (ValueError, TypeError): raise ValueError(f"An option ({x}) could not be converted to a float.") return x def maybe_parse_reset_bounds( options: Optional[dict], default_low: float, default_high: float ) -> Tuple[float, float]: """ This function can be called during a reset() to customize the sampling ranges for setting the initial state distributions. Args: options: Options passed in to reset(). default_low: Default lower limit to use, if none specified in options. default_high: Default upper limit to use, if none specified in options. Returns: Tuple of the lower and upper limits. """ if options is None: return default_low, default_high low = options.get("low") if "low" in options else default_low high = options.get("high") if "high" in options else default_high # We expect only numerical inputs. low = verify_number_and_cast(low) high = verify_number_and_cast(high) if low > high: raise ValueError( f"Lower bound ({low}) must be lower than higher bound ({high})." ) return low, high ================================================ FILE: gym/envs/mujoco/__init__.py ================================================ from gym.envs.mujoco.mujoco_env import MujocoEnv, MuJocoPyEnv # isort:skip from gym.envs.mujoco.ant import AntEnv from gym.envs.mujoco.half_cheetah import HalfCheetahEnv from gym.envs.mujoco.hopper import HopperEnv from gym.envs.mujoco.humanoid import HumanoidEnv from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv from gym.envs.mujoco.pusher import PusherEnv from gym.envs.mujoco.reacher import ReacherEnv from gym.envs.mujoco.swimmer import SwimmerEnv from gym.envs.mujoco.walker2d import Walker2dEnv ================================================ FILE: gym/envs/mujoco/ant.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box class AntEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 20, } def __init__(self, **kwargs): observation_space = Box( low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64 ) MuJocoPyEnv.__init__( self, "ant.xml", 5, observation_space=observation_space, **kwargs ) utils.EzPickle.__init__(self, **kwargs) def step(self, a): xposbefore = self.get_body_com("torso")[0] self.do_simulation(a, self.frame_skip) xposafter = self.get_body_com("torso")[0] forward_reward = (xposafter - xposbefore) / self.dt ctrl_cost = 0.5 * np.square(a).sum() contact_cost = ( 0.5 * 1e-3 * np.sum(np.square(np.clip(self.sim.data.cfrc_ext, -1, 1))) ) survive_reward = 1.0 reward = forward_reward - ctrl_cost - contact_cost + survive_reward state = self.state_vector() not_terminated = ( np.isfinite(state).all() and state[2] >= 0.2 and state[2] <= 1.0 ) terminated = not not_terminated ob = self._get_obs() if self.render_mode == "human": self.render() return ( ob, reward, terminated, False, dict( reward_forward=forward_reward, reward_ctrl=-ctrl_cost, reward_contact=-contact_cost, reward_survive=survive_reward, ), ) def _get_obs(self): return np.concatenate( [ self.sim.data.qpos.flat[2:], self.sim.data.qvel.flat, np.clip(self.sim.data.cfrc_ext, -1, 1).flat, ] ) def reset_model(self): qpos = self.init_qpos + self.np_random.uniform( size=self.model.nq, low=-0.1, high=0.1 ) qvel = self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1 self.set_state(qpos, qvel) return self._get_obs() def viewer_setup(self): assert self.viewer is not None self.viewer.cam.distance = self.model.stat.extent * 0.5 ================================================ FILE: gym/envs/mujoco/ant_v3.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "distance": 4.0, } class AntEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 20, } def __init__( self, xml_file="ant.xml", ctrl_cost_weight=0.5, contact_cost_weight=5e-4, healthy_reward=1.0, terminate_when_unhealthy=True, healthy_z_range=(0.2, 1.0), contact_force_range=(-1.0, 1.0), reset_noise_scale=0.1, exclude_current_positions_from_observation=True, **kwargs ): utils.EzPickle.__init__( self, xml_file, ctrl_cost_weight, contact_cost_weight, healthy_reward, terminate_when_unhealthy, healthy_z_range, contact_force_range, reset_noise_scale, exclude_current_positions_from_observation, **kwargs ) self._ctrl_cost_weight = ctrl_cost_weight self._contact_cost_weight = contact_cost_weight self._healthy_reward = healthy_reward self._terminate_when_unhealthy = terminate_when_unhealthy self._healthy_z_range = healthy_z_range self._contact_force_range = contact_force_range self._reset_noise_scale = reset_noise_scale self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) if exclude_current_positions_from_observation: observation_space = Box( low=-np.inf, high=np.inf, shape=(111,), dtype=np.float64 ) else: observation_space = Box( low=-np.inf, high=np.inf, shape=(113,), dtype=np.float64 ) MuJocoPyEnv.__init__( self, xml_file, 5, observation_space=observation_space, **kwargs ) @property def healthy_reward(self): return ( float(self.is_healthy or self._terminate_when_unhealthy) * self._healthy_reward ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) return control_cost @property def contact_forces(self): raw_contact_forces = self.sim.data.cfrc_ext min_value, max_value = self._contact_force_range contact_forces = np.clip(raw_contact_forces, min_value, max_value) return contact_forces @property def contact_cost(self): contact_cost = self._contact_cost_weight * np.sum( np.square(self.contact_forces) ) return contact_cost @property def is_healthy(self): state = self.state_vector() min_z, max_z = self._healthy_z_range is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z return is_healthy @property def terminated(self): terminated = not self.is_healthy if self._terminate_when_unhealthy else False return terminated def step(self, action): xy_position_before = self.get_body_com("torso")[:2].copy() self.do_simulation(action, self.frame_skip) xy_position_after = self.get_body_com("torso")[:2].copy() xy_velocity = (xy_position_after - xy_position_before) / self.dt x_velocity, y_velocity = xy_velocity ctrl_cost = self.control_cost(action) contact_cost = self.contact_cost forward_reward = x_velocity healthy_reward = self.healthy_reward rewards = forward_reward + healthy_reward costs = ctrl_cost + contact_cost reward = rewards - costs terminated = self.terminated observation = self._get_obs() info = { "reward_forward": forward_reward, "reward_ctrl": -ctrl_cost, "reward_contact": -contact_cost, "reward_survive": healthy_reward, "x_position": xy_position_after[0], "y_position": xy_position_after[1], "distance_from_origin": np.linalg.norm(xy_position_after, ord=2), "x_velocity": x_velocity, "y_velocity": y_velocity, "forward_reward": forward_reward, } if self.render_mode == "human": self.render() return observation, reward, terminated, False, info def _get_obs(self): position = self.sim.data.qpos.flat.copy() velocity = self.sim.data.qvel.flat.copy() contact_force = self.contact_forces.flat.copy() if self._exclude_current_positions_from_observation: position = position[2:] observations = np.concatenate((position, velocity, contact_force)) return observations def reset_model(self): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale qpos = self.init_qpos + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nq ) qvel = ( self.init_qvel + self._reset_noise_scale * self.np_random.standard_normal(self.model.nv) ) self.set_state(qpos, qvel) observation = self._get_obs() return observation def viewer_setup(self): assert self.viewer is not None for key, value in DEFAULT_CAMERA_CONFIG.items(): if isinstance(value, np.ndarray): getattr(self.viewer.cam, key)[:] = value else: setattr(self.viewer.cam, key, value) ================================================ FILE: gym/envs/mujoco/ant_v4.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "distance": 4.0, } class AntEnv(MujocoEnv, utils.EzPickle): """ ### Description This environment is based on the environment introduced by Schulman, Moritz, Levine, Jordan and Abbeel in ["High-Dimensional Continuous Control Using Generalized Advantage Estimation"](https://arxiv.org/abs/1506.02438). The ant is a 3D robot consisting of one torso (free rotational body) with four legs attached to it with each leg having two links. The goal is to coordinate the four legs to move in the forward (right) direction by applying torques on the eight hinges connecting the two links of each leg and the torso (nine parts and eight hinges). ### Action Space The action space is a `Box(-1, 1, (8,), float32)`. An action represents the torques applied at the hinge joints. | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | | --- | ----------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ | | 0 | Torque applied on the rotor between the torso and front left hip | -1 | 1 | hip_1 (front_left_leg) | hinge | torque (N m) | | 1 | Torque applied on the rotor between the front left two links | -1 | 1 | angle_1 (front_left_leg) | hinge | torque (N m) | | 2 | Torque applied on the rotor between the torso and front right hip | -1 | 1 | hip_2 (front_right_leg) | hinge | torque (N m) | | 3 | Torque applied on the rotor between the front right two links | -1 | 1 | angle_2 (front_right_leg) | hinge | torque (N m) | | 4 | Torque applied on the rotor between the torso and back left hip | -1 | 1 | hip_3 (back_leg) | hinge | torque (N m) | | 5 | Torque applied on the rotor between the back left two links | -1 | 1 | angle_3 (back_leg) | hinge | torque (N m) | | 6 | Torque applied on the rotor between the torso and back right hip | -1 | 1 | hip_4 (right_back_leg) | hinge | torque (N m) | | 7 | Torque applied on the rotor between the back right two links | -1 | 1 | angle_4 (right_back_leg) | hinge | torque (N m) | ### Observation Space Observations consist of positional values of different body parts of the ant, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities. By default, observations do not include the x- and y-coordinates of the ant's torso. These may be included by passing `exclude_current_positions_from_observation=False` during construction. In that case, the observation space will have 113 dimensions where the first two dimensions represent the x- and y- coordinates of the ant's torso. Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates of the torso will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively. However, by default, an observation is a `ndarray` with shape `(111,)` where the elements correspond to the following: | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | |-----|--------------------------------------------------------------|--------|--------|----------------------------------------|-------|--------------------------| | 0 | z-coordinate of the torso (centre) | -Inf | Inf | torso | free | position (m) | | 1 | x-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) | | 2 | y-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) | | 3 | z-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) | | 4 | w-orientation of the torso (centre) | -Inf | Inf | torso | free | angle (rad) | | 5 | angle between torso and first link on front left | -Inf | Inf | hip_1 (front_left_leg) | hinge | angle (rad) | | 6 | angle between the two links on the front left | -Inf | Inf | ankle_1 (front_left_leg) | hinge | angle (rad) | | 7 | angle between torso and first link on front right | -Inf | Inf | hip_2 (front_right_leg) | hinge | angle (rad) | | 8 | angle between the two links on the front right | -Inf | Inf | ankle_2 (front_right_leg) | hinge | angle (rad) | | 9 | angle between torso and first link on back left | -Inf | Inf | hip_3 (back_leg) | hinge | angle (rad) | | 10 | angle between the two links on the back left | -Inf | Inf | ankle_3 (back_leg) | hinge | angle (rad) | | 11 | angle between torso and first link on back right | -Inf | Inf | hip_4 (right_back_leg) | hinge | angle (rad) | | 12 | angle between the two links on the back right | -Inf | Inf | ankle_4 (right_back_leg) | hinge | angle (rad) | | 13 | x-coordinate velocity of the torso | -Inf | Inf | torso | free | velocity (m/s) | | 14 | y-coordinate velocity of the torso | -Inf | Inf | torso | free | velocity (m/s) | | 15 | z-coordinate velocity of the torso | -Inf | Inf | torso | free | velocity (m/s) | | 16 | x-coordinate angular velocity of the torso | -Inf | Inf | torso | free | angular velocity (rad/s) | | 17 | y-coordinate angular velocity of the torso | -Inf | Inf | torso | free | angular velocity (rad/s) | | 18 | z-coordinate angular velocity of the torso | -Inf | Inf | torso | free | angular velocity (rad/s) | | 19 | angular velocity of angle between torso and front left link | -Inf | Inf | hip_1 (front_left_leg) | hinge | angle (rad) | | 20 | angular velocity of the angle between front left links | -Inf | Inf | ankle_1 (front_left_leg) | hinge | angle (rad) | | 21 | angular velocity of angle between torso and front right link | -Inf | Inf | hip_2 (front_right_leg) | hinge | angle (rad) | | 22 | angular velocity of the angle between front right links | -Inf | Inf | ankle_2 (front_right_leg) | hinge | angle (rad) | | 23 | angular velocity of angle between torso and back left link | -Inf | Inf | hip_3 (back_leg) | hinge | angle (rad) | | 24 | angular velocity of the angle between back left links | -Inf | Inf | ankle_3 (back_leg) | hinge | angle (rad) | | 25 | angular velocity of angle between torso and back right link | -Inf | Inf | hip_4 (right_back_leg) | hinge | angle (rad) | | 26 |angular velocity of the angle between back right links | -Inf | Inf | ankle_4 (right_back_leg) | hinge | angle (rad) | The remaining 14*6 = 84 elements of the observation are contact forces (external forces - force x, y, z and torque x, y, z) applied to the center of mass of each of the links. The 14 links are: the ground link, the torso link, and 3 links for each leg (1 + 1 + 12) with the 6 external forces. The (x,y,z) coordinates are translational DOFs while the orientations are rotational DOFs expressed as quaternions. One can read more about free joints on the [Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html). **Note:** Ant-v4 environment no longer has the following contact forces issue. If using previous Humanoid versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0 results in the contact forces always being 0. As such we recommend to use a Mujoco-Py version < 2.0 when using the Ant environment if you would like to report results with contact forces (if contact forces are not used in your experiments, you can use version > 2.0). ### Rewards The reward consists of three parts: - *healthy_reward*: Every timestep that the ant is healthy (see definition in section "Episode Termination"), it gets a reward of fixed value `healthy_reward` - *forward_reward*: A reward of moving forward which is measured as *(x-coordinate before action - x-coordinate after action)/dt*. *dt* is the time between actions and is dependent on the `frame_skip` parameter (default is 5), where the frametime is 0.01 - making the default *dt = 5 * 0.01 = 0.05*. This reward would be positive if the ant moves forward (in positive x direction). - *ctrl_cost*: A negative reward for penalising the ant if it takes actions that are too large. It is measured as *`ctrl_cost_weight` * sum(action2)* where *`ctr_cost_weight`* is a parameter set for the control and has a default value of 0.5. - *contact_cost*: A negative reward for penalising the ant if the external contact force is too large. It is calculated *`contact_cost_weight` * sum(clip(external contact force to `contact_force_range`)2)*. The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost - contact_cost* and `info` will also contain the individual reward terms. ### Starting State All observations start in state (0.0, 0.0, 0.75, 1.0, 0.0 ... 0.0) with a uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the positional values and standard normal noise with mean 0 and standard deviation `reset_noise_scale` added to the velocity values for stochasticity. Note that the initial z coordinate is intentionally selected to be slightly high, thereby indicating a standing up ant. The initial orientation is designed to make it face forward as well. ### Episode End The ant is said to be unhealthy if any of the following happens: 1. Any of the state space values is no longer finite 2. The z-coordinate of the torso is **not** in the closed interval given by `healthy_z_range` (defaults to [0.2, 1.0]) If `terminate_when_unhealthy=True` is passed during construction (which is the default), the episode ends when any of the following happens: 1. Truncation: The episode duration reaches a 1000 timesteps 2. Termination: The ant is unhealthy If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded. ### Arguments No additional arguments are currently supported in v2 and lower. ``` env = gym.make('Ant-v2') ``` v3 and v4 take gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. ``` env = gym.make('Ant-v4', ctrl_cost_weight=0.1, ...) ``` | Parameter | Type | Default |Description | |-------------------------|------------|--------------|-------------------------------| | `xml_file` | **str** | `"ant.xml"` | Path to a MuJoCo model | | `ctrl_cost_weight` | **float** | `0.5` | Weight for *ctrl_cost* term (see section on reward) | | `contact_cost_weight` | **float** | `5e-4` | Weight for *contact_cost* term (see section on reward) | | `healthy_reward` | **float** | `1` | Constant reward given if the ant is "healthy" after timestep | | `terminate_when_unhealthy` | **bool**| `True` | If true, issue a done signal if the z-coordinate of the torso is no longer in the `healthy_z_range` | | `healthy_z_range` | **tuple** | `(0.2, 1)` | The ant is considered healthy if the z-coordinate of the torso is in this range | | `contact_force_range` | **tuple** | `(-1, 1)` | Contact forces are clipped to this range in the computation of *contact_cost* | | `reset_noise_scale` | **float** | `0.1` | Scale of random perturbations of initial position and velocity (see section on Starting State) | | `exclude_current_positions_from_observation`| **bool** | `True`| Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies | ### Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco_py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 20, } def __init__( self, xml_file="ant.xml", ctrl_cost_weight=0.5, use_contact_forces=False, contact_cost_weight=5e-4, healthy_reward=1.0, terminate_when_unhealthy=True, healthy_z_range=(0.2, 1.0), contact_force_range=(-1.0, 1.0), reset_noise_scale=0.1, exclude_current_positions_from_observation=True, **kwargs ): utils.EzPickle.__init__( self, xml_file, ctrl_cost_weight, use_contact_forces, contact_cost_weight, healthy_reward, terminate_when_unhealthy, healthy_z_range, contact_force_range, reset_noise_scale, exclude_current_positions_from_observation, **kwargs ) self._ctrl_cost_weight = ctrl_cost_weight self._contact_cost_weight = contact_cost_weight self._healthy_reward = healthy_reward self._terminate_when_unhealthy = terminate_when_unhealthy self._healthy_z_range = healthy_z_range self._contact_force_range = contact_force_range self._reset_noise_scale = reset_noise_scale self._use_contact_forces = use_contact_forces self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) obs_shape = 27 if not exclude_current_positions_from_observation: obs_shape += 2 if use_contact_forces: obs_shape += 84 observation_space = Box( low=-np.inf, high=np.inf, shape=(obs_shape,), dtype=np.float64 ) MujocoEnv.__init__( self, xml_file, 5, observation_space=observation_space, **kwargs ) @property def healthy_reward(self): return ( float(self.is_healthy or self._terminate_when_unhealthy) * self._healthy_reward ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) return control_cost @property def contact_forces(self): raw_contact_forces = self.data.cfrc_ext min_value, max_value = self._contact_force_range contact_forces = np.clip(raw_contact_forces, min_value, max_value) return contact_forces @property def contact_cost(self): contact_cost = self._contact_cost_weight * np.sum( np.square(self.contact_forces) ) return contact_cost @property def is_healthy(self): state = self.state_vector() min_z, max_z = self._healthy_z_range is_healthy = np.isfinite(state).all() and min_z <= state[2] <= max_z return is_healthy @property def terminated(self): terminated = not self.is_healthy if self._terminate_when_unhealthy else False return terminated def step(self, action): xy_position_before = self.get_body_com("torso")[:2].copy() self.do_simulation(action, self.frame_skip) xy_position_after = self.get_body_com("torso")[:2].copy() xy_velocity = (xy_position_after - xy_position_before) / self.dt x_velocity, y_velocity = xy_velocity forward_reward = x_velocity healthy_reward = self.healthy_reward rewards = forward_reward + healthy_reward costs = ctrl_cost = self.control_cost(action) terminated = self.terminated observation = self._get_obs() info = { "reward_forward": forward_reward, "reward_ctrl": -ctrl_cost, "reward_survive": healthy_reward, "x_position": xy_position_after[0], "y_position": xy_position_after[1], "distance_from_origin": np.linalg.norm(xy_position_after, ord=2), "x_velocity": x_velocity, "y_velocity": y_velocity, "forward_reward": forward_reward, } if self._use_contact_forces: contact_cost = self.contact_cost costs += contact_cost info["reward_ctrl"] = -contact_cost reward = rewards - costs if self.render_mode == "human": self.render() return observation, reward, terminated, False, info def _get_obs(self): position = self.data.qpos.flat.copy() velocity = self.data.qvel.flat.copy() if self._exclude_current_positions_from_observation: position = position[2:] if self._use_contact_forces: contact_force = self.contact_forces.flat.copy() return np.concatenate((position, velocity, contact_force)) else: return np.concatenate((position, velocity)) def reset_model(self): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale qpos = self.init_qpos + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nq ) qvel = ( self.init_qvel + self._reset_noise_scale * self.np_random.standard_normal(self.model.nv) ) self.set_state(qpos, qvel) observation = self._get_obs() return observation def viewer_setup(self): assert self.viewer is not None for key, value in DEFAULT_CAMERA_CONFIG.items(): if isinstance(value, np.ndarray): getattr(self.viewer.cam, key)[:] = value else: setattr(self.viewer.cam, key, value) ================================================ FILE: gym/envs/mujoco/assets/ant.xml ================================================ ================================================ FILE: gym/envs/mujoco/assets/half_cheetah.xml ================================================ ================================================ FILE: gym/envs/mujoco/assets/hopper.xml ================================================ ================================================ FILE: gym/envs/mujoco/assets/humanoid.xml ================================================ ================================================ FILE: gym/envs/mujoco/assets/humanoidstandup.xml ================================================ ================================================ FILE: gym/envs/mujoco/assets/inverted_double_pendulum.xml ================================================ ================================================ FILE: gym/envs/mujoco/assets/inverted_pendulum.xml ================================================ ================================================ FILE: gym/envs/mujoco/assets/point.xml ================================================ ================================================ FILE: gym/envs/mujoco/assets/pusher.xml ================================================ ================================================ FILE: gym/envs/mujoco/assets/reacher.xml ================================================ ================================================ FILE: gym/envs/mujoco/assets/swimmer.xml ================================================ ================================================ FILE: gym/envs/mujoco/assets/walker2d.xml ================================================ ================================================ FILE: gym/envs/mujoco/half_cheetah.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 20, } def __init__(self, **kwargs): observation_space = Box(low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64) MuJocoPyEnv.__init__( self, "half_cheetah.xml", 5, observation_space=observation_space, **kwargs ) utils.EzPickle.__init__(self, **kwargs) def step(self, action): xposbefore = self.sim.data.qpos[0] self.do_simulation(action, self.frame_skip) xposafter = self.sim.data.qpos[0] ob = self._get_obs() reward_ctrl = -0.1 * np.square(action).sum() reward_run = (xposafter - xposbefore) / self.dt reward = reward_ctrl + reward_run terminated = False if self.render_mode == "human": self.render() return ( ob, reward, terminated, False, dict(reward_run=reward_run, reward_ctrl=reward_ctrl), ) def _get_obs(self): return np.concatenate( [ self.sim.data.qpos.flat[1:], self.sim.data.qvel.flat, ] ) def reset_model(self): qpos = self.init_qpos + self.np_random.uniform( low=-0.1, high=0.1, size=self.model.nq ) qvel = self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1 self.set_state(qpos, qvel) return self._get_obs() def viewer_setup(self): assert self.viewer is not None self.viewer.cam.distance = self.model.stat.extent * 0.5 ================================================ FILE: gym/envs/mujoco/half_cheetah_v3.py ================================================ __credits__ = ["Rushiv Arora"] import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "distance": 4.0, } class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 20, } def __init__( self, xml_file="half_cheetah.xml", forward_reward_weight=1.0, ctrl_cost_weight=0.1, reset_noise_scale=0.1, exclude_current_positions_from_observation=True, **kwargs ): utils.EzPickle.__init__( self, xml_file, forward_reward_weight, ctrl_cost_weight, reset_noise_scale, exclude_current_positions_from_observation, **kwargs ) self._forward_reward_weight = forward_reward_weight self._ctrl_cost_weight = ctrl_cost_weight self._reset_noise_scale = reset_noise_scale self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) if exclude_current_positions_from_observation: observation_space = Box( low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64 ) else: observation_space = Box( low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 ) MuJocoPyEnv.__init__( self, xml_file, 5, observation_space=observation_space, **kwargs ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) return control_cost def step(self, action): x_position_before = self.sim.data.qpos[0] self.do_simulation(action, self.frame_skip) x_position_after = self.sim.data.qpos[0] x_velocity = (x_position_after - x_position_before) / self.dt ctrl_cost = self.control_cost(action) forward_reward = self._forward_reward_weight * x_velocity observation = self._get_obs() reward = forward_reward - ctrl_cost terminated = False info = { "x_position": x_position_after, "x_velocity": x_velocity, "reward_run": forward_reward, "reward_ctrl": -ctrl_cost, } if self.render_mode == "human": self.render() return observation, reward, terminated, False, info def _get_obs(self): position = self.sim.data.qpos.flat.copy() velocity = self.sim.data.qvel.flat.copy() if self._exclude_current_positions_from_observation: position = position[1:] observation = np.concatenate((position, velocity)).ravel() return observation def reset_model(self): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale qpos = self.init_qpos + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nq ) qvel = ( self.init_qvel + self._reset_noise_scale * self.np_random.standard_normal(self.model.nv) ) self.set_state(qpos, qvel) observation = self._get_obs() return observation def viewer_setup(self): assert self.viewer is not None for key, value in DEFAULT_CAMERA_CONFIG.items(): if isinstance(value, np.ndarray): getattr(self.viewer.cam, key)[:] = value else: setattr(self.viewer.cam, key, value) ================================================ FILE: gym/envs/mujoco/half_cheetah_v4.py ================================================ __credits__ = ["Rushiv Arora"] import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "distance": 4.0, } class HalfCheetahEnv(MujocoEnv, utils.EzPickle): """ ### Description This environment is based on the work by P. Wawrzyński in ["A Cat-Like Robot Real-Time Learning to Run"](http://staff.elka.pw.edu.pl/~pwawrzyn/pub-s/0812_LSCLRR.pdf). The HalfCheetah is a 2-dimensional robot consisting of 9 links and 8 joints connecting them (including two paws). The goal is to apply a torque on the joints to make the cheetah run forward (right) as fast as possible, with a positive reward allocated based on the distance moved forward and a negative reward allocated for moving backward. The torso and head of the cheetah are fixed, and the torque can only be applied on the other 6 joints over the front and back thighs (connecting to the torso), shins (connecting to the thighs) and feet (connecting to the shins). ### Action Space The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied between *links*. | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | | --- | --------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ | | 0 | Torque applied on the back thigh rotor | -1 | 1 | bthigh | hinge | torque (N m) | | 1 | Torque applied on the back shin rotor | -1 | 1 | bshin | hinge | torque (N m) | | 2 | Torque applied on the back foot rotor | -1 | 1 | bfoot | hinge | torque (N m) | | 3 | Torque applied on the front thigh rotor | -1 | 1 | fthigh | hinge | torque (N m) | | 4 | Torque applied on the front shin rotor | -1 | 1 | fshin | hinge | torque (N m) | | 5 | Torque applied on the front foot rotor | -1 | 1 | ffoot | hinge | torque (N m) | ### Observation Space Observations consist of positional values of different body parts of the cheetah, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities. By default, observations do not include the x-coordinate of the cheetah's center of mass. It may be included by passing `exclude_current_positions_from_observation=False` during construction. In that case, the observation space will have 18 dimensions where the first dimension represents the x-coordinate of the cheetah's center of mass. Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x-coordinate will be returned in `info` with key `"x_position"`. However, by default, the observation is a `ndarray` with shape `(17,)` where the elements correspond to the following: | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | | --- | ------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ | | 0 | z-coordinate of the front tip | -Inf | Inf | rootz | slide | position (m) | | 1 | angle of the front tip | -Inf | Inf | rooty | hinge | angle (rad) | | 2 | angle of the second rotor | -Inf | Inf | bthigh | hinge | angle (rad) | | 3 | angle of the second rotor | -Inf | Inf | bshin | hinge | angle (rad) | | 4 | velocity of the tip along the x-axis | -Inf | Inf | bfoot | hinge | angle (rad) | | 5 | velocity of the tip along the y-axis | -Inf | Inf | fthigh | hinge | angle (rad) | | 6 | angular velocity of front tip | -Inf | Inf | fshin | hinge | angle (rad) | | 7 | angular velocity of second rotor | -Inf | Inf | ffoot | hinge | angle (rad) | | 8 | x-coordinate of the front tip | -Inf | Inf | rootx | slide | velocity (m/s) | | 9 | y-coordinate of the front tip | -Inf | Inf | rootz | slide | velocity (m/s) | | 10 | angle of the front tip | -Inf | Inf | rooty | hinge | angular velocity (rad/s) | | 11 | angle of the second rotor | -Inf | Inf | bthigh | hinge | angular velocity (rad/s) | | 12 | angle of the second rotor | -Inf | Inf | bshin | hinge | angular velocity (rad/s) | | 13 | velocity of the tip along the x-axis | -Inf | Inf | bfoot | hinge | angular velocity (rad/s) | | 14 | velocity of the tip along the y-axis | -Inf | Inf | fthigh | hinge | angular velocity (rad/s) | | 15 | angular velocity of front tip | -Inf | Inf | fshin | hinge | angular velocity (rad/s) | | 16 | angular velocity of second rotor | -Inf | Inf | ffoot | hinge | angular velocity (rad/s) | ### Rewards The reward consists of two parts: - *forward_reward*: A reward of moving forward which is measured as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is the time between actions and is dependent on the frame_skip parameter (fixed to 5), where the frametime is 0.01 - making the default *dt = 5 * 0.01 = 0.05*. This reward would be positive if the cheetah runs forward (right). - *ctrl_cost*: A cost for penalising the cheetah if it takes actions that are too large. It is measured as *`ctrl_cost_weight` * sum(action2)* where *`ctrl_cost_weight`* is a parameter set for the control and has a default value of 0.1 The total reward returned is ***reward*** *=* *forward_reward - ctrl_cost* and `info` will also contain the individual reward terms ### Starting State All observations start in state (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,) with a noise added to the initial state for stochasticity. As seen before, the first 8 values in the state are positional and the last 9 values are velocity. A uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] is added to the positional values while a standard normal noise with a mean of 0 and standard deviation of `reset_noise_scale` is added to the initial velocity values of all zeros. ### Episode End The episode truncates when the episode length is greater than 1000. ### Arguments No additional arguments are currently supported in v2 and lower. ``` env = gym.make('HalfCheetah-v2') ``` v3 and v4 take gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. ``` env = gym.make('HalfCheetah-v4', ctrl_cost_weight=0.1, ....) ``` | Parameter | Type | Default | Description | | -------------------------------------------- | --------- | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `xml_file` | **str** | `"half_cheetah.xml"` | Path to a MuJoCo model | | `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) | | `ctrl_cost_weight` | **float** | `0.1` | Weight for _ctrl_cost_ weight (see section on reward) | | `reset_noise_scale` | **float** | `0.1` | Scale of random perturbations of initial position and velocity (see section on Starting State) | | `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies | ### Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco_py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 20, } def __init__( self, forward_reward_weight=1.0, ctrl_cost_weight=0.1, reset_noise_scale=0.1, exclude_current_positions_from_observation=True, **kwargs ): utils.EzPickle.__init__( self, forward_reward_weight, ctrl_cost_weight, reset_noise_scale, exclude_current_positions_from_observation, **kwargs ) self._forward_reward_weight = forward_reward_weight self._ctrl_cost_weight = ctrl_cost_weight self._reset_noise_scale = reset_noise_scale self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) if exclude_current_positions_from_observation: observation_space = Box( low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64 ) else: observation_space = Box( low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 ) MujocoEnv.__init__( self, "half_cheetah.xml", 5, observation_space=observation_space, **kwargs ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) return control_cost def step(self, action): x_position_before = self.data.qpos[0] self.do_simulation(action, self.frame_skip) x_position_after = self.data.qpos[0] x_velocity = (x_position_after - x_position_before) / self.dt ctrl_cost = self.control_cost(action) forward_reward = self._forward_reward_weight * x_velocity observation = self._get_obs() reward = forward_reward - ctrl_cost terminated = False info = { "x_position": x_position_after, "x_velocity": x_velocity, "reward_run": forward_reward, "reward_ctrl": -ctrl_cost, } if self.render_mode == "human": self.render() return observation, reward, terminated, False, info def _get_obs(self): position = self.data.qpos.flat.copy() velocity = self.data.qvel.flat.copy() if self._exclude_current_positions_from_observation: position = position[1:] observation = np.concatenate((position, velocity)).ravel() return observation def reset_model(self): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale qpos = self.init_qpos + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nq ) qvel = ( self.init_qvel + self._reset_noise_scale * self.np_random.standard_normal(self.model.nv) ) self.set_state(qpos, qvel) observation = self._get_obs() return observation def viewer_setup(self): assert self.viewer is not None for key, value in DEFAULT_CAMERA_CONFIG.items(): if isinstance(value, np.ndarray): getattr(self.viewer.cam, key)[:] = value else: setattr(self.viewer.cam, key, value) ================================================ FILE: gym/envs/mujoco/hopper.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box class HopperEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 125, } def __init__(self, **kwargs): observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64) MuJocoPyEnv.__init__( self, "hopper.xml", 4, observation_space=observation_space, **kwargs ) utils.EzPickle.__init__(self, **kwargs) def step(self, a): posbefore = self.sim.data.qpos[0] self.do_simulation(a, self.frame_skip) posafter, height, ang = self.sim.data.qpos[0:3] alive_bonus = 1.0 reward = (posafter - posbefore) / self.dt reward += alive_bonus reward -= 1e-3 * np.square(a).sum() s = self.state_vector() terminated = not ( np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and (height > 0.7) and (abs(ang) < 0.2) ) ob = self._get_obs() if self.render_mode == "human": self.render() return ob, reward, terminated, False, {} def _get_obs(self): return np.concatenate( [self.sim.data.qpos.flat[1:], np.clip(self.sim.data.qvel.flat, -10, 10)] ) def reset_model(self): qpos = self.init_qpos + self.np_random.uniform( low=-0.005, high=0.005, size=self.model.nq ) qvel = self.init_qvel + self.np_random.uniform( low=-0.005, high=0.005, size=self.model.nv ) self.set_state(qpos, qvel) return self._get_obs() def viewer_setup(self): assert self.viewer is not None self.viewer.cam.trackbodyid = 2 self.viewer.cam.distance = self.model.stat.extent * 0.75 self.viewer.cam.lookat[2] = 1.15 self.viewer.cam.elevation = -20 ================================================ FILE: gym/envs/mujoco/hopper_v3.py ================================================ __credits__ = ["Rushiv Arora"] import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "trackbodyid": 2, "distance": 3.0, "lookat": np.array((0.0, 0.0, 1.15)), "elevation": -20.0, } class HopperEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 125, } def __init__( self, xml_file="hopper.xml", forward_reward_weight=1.0, ctrl_cost_weight=1e-3, healthy_reward=1.0, terminate_when_unhealthy=True, healthy_state_range=(-100.0, 100.0), healthy_z_range=(0.7, float("inf")), healthy_angle_range=(-0.2, 0.2), reset_noise_scale=5e-3, exclude_current_positions_from_observation=True, **kwargs ): utils.EzPickle.__init__( self, xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale, exclude_current_positions_from_observation, **kwargs ) self._forward_reward_weight = forward_reward_weight self._ctrl_cost_weight = ctrl_cost_weight self._healthy_reward = healthy_reward self._terminate_when_unhealthy = terminate_when_unhealthy self._healthy_state_range = healthy_state_range self._healthy_z_range = healthy_z_range self._healthy_angle_range = healthy_angle_range self._reset_noise_scale = reset_noise_scale self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) if exclude_current_positions_from_observation: observation_space = Box( low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64 ) else: observation_space = Box( low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64 ) MuJocoPyEnv.__init__( self, xml_file, 4, observation_space=observation_space, **kwargs ) @property def healthy_reward(self): return ( float(self.is_healthy or self._terminate_when_unhealthy) * self._healthy_reward ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) return control_cost @property def is_healthy(self): z, angle = self.sim.data.qpos[1:3] state = self.state_vector()[2:] min_state, max_state = self._healthy_state_range min_z, max_z = self._healthy_z_range min_angle, max_angle = self._healthy_angle_range healthy_state = np.all(np.logical_and(min_state < state, state < max_state)) healthy_z = min_z < z < max_z healthy_angle = min_angle < angle < max_angle is_healthy = all((healthy_state, healthy_z, healthy_angle)) return is_healthy @property def terminated(self): terminated = not self.is_healthy if self._terminate_when_unhealthy else False return terminated def _get_obs(self): position = self.sim.data.qpos.flat.copy() velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10) if self._exclude_current_positions_from_observation: position = position[1:] observation = np.concatenate((position, velocity)).ravel() return observation def step(self, action): x_position_before = self.sim.data.qpos[0] self.do_simulation(action, self.frame_skip) x_position_after = self.sim.data.qpos[0] x_velocity = (x_position_after - x_position_before) / self.dt ctrl_cost = self.control_cost(action) forward_reward = self._forward_reward_weight * x_velocity healthy_reward = self.healthy_reward rewards = forward_reward + healthy_reward costs = ctrl_cost observation = self._get_obs() reward = rewards - costs terminated = self.terminated info = { "x_position": x_position_after, "x_velocity": x_velocity, } if self.render_mode == "human": self.render() return observation, reward, terminated, False, info def reset_model(self): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale qpos = self.init_qpos + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nq ) qvel = self.init_qvel + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nv ) self.set_state(qpos, qvel) observation = self._get_obs() return observation def viewer_setup(self): assert self.viewer is not None for key, value in DEFAULT_CAMERA_CONFIG.items(): if isinstance(value, np.ndarray): getattr(self.viewer.cam, key)[:] = value else: setattr(self.viewer.cam, key, value) ================================================ FILE: gym/envs/mujoco/hopper_v4.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "trackbodyid": 2, "distance": 3.0, "lookat": np.array((0.0, 0.0, 1.15)), "elevation": -20.0, } class HopperEnv(MujocoEnv, utils.EzPickle): """ ### Description This environment is based on the work done by Erez, Tassa, and Todorov in ["Infinite Horizon Model Predictive Control for Nonlinear Periodic Tasks"](http://www.roboticsproceedings.org/rss07/p10.pdf). The environment aims to increase the number of independent state and control variables as compared to the classic control environments. The hopper is a two-dimensional one-legged figure that consist of four main body parts - the torso at the top, the thigh in the middle, the leg in the bottom, and a single foot on which the entire body rests. The goal is to make hops that move in the forward (right) direction by applying torques on the three hinges connecting the four body parts. ### Action Space The action space is a `Box(-1, 1, (3,), float32)`. An action represents the torques applied between *links* | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | |-----|------------------------------------|-------------|-------------|----------------------------------|-------|--------------| | 0 | Torque applied on the thigh rotor | -1 | 1 | thigh_joint | hinge | torque (N m) | | 1 | Torque applied on the leg rotor | -1 | 1 | leg_joint | hinge | torque (N m) | | 3 | Torque applied on the foot rotor | -1 | 1 | foot_joint | hinge | torque (N m) | ### Observation Space Observations consist of positional values of different body parts of the hopper, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities. By default, observations do not include the x-coordinate of the hopper. It may be included by passing `exclude_current_positions_from_observation=False` during construction. In that case, the observation space will have 12 dimensions where the first dimension represents the x-coordinate of the hopper. Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x-coordinate will be returned in `info` with key `"x_position"`. However, by default, the observation is a `ndarray` with shape `(11,)` where the elements correspond to the following: | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | | --- | ------------------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ | | 0 | z-coordinate of the top (height of hopper) | -Inf | Inf | rootz | slide | position (m) | | 1 | angle of the top | -Inf | Inf | rooty | hinge | angle (rad) | | 2 | angle of the thigh joint | -Inf | Inf | thigh_joint | hinge | angle (rad) | | 3 | angle of the leg joint | -Inf | Inf | leg_joint | hinge | angle (rad) | | 4 | angle of the foot joint | -Inf | Inf | foot_joint | hinge | angle (rad) | | 5 | velocity of the x-coordinate of the top | -Inf | Inf | rootx | slide | velocity (m/s) | | 6 | velocity of the z-coordinate (height) of the top | -Inf | Inf | rootz | slide | velocity (m/s) | | 7 | angular velocity of the angle of the top | -Inf | Inf | rooty | hinge | angular velocity (rad/s) | | 8 | angular velocity of the thigh hinge | -Inf | Inf | thigh_joint | hinge | angular velocity (rad/s) | | 9 | angular velocity of the leg hinge | -Inf | Inf | leg_joint | hinge | angular velocity (rad/s) | | 10 | angular velocity of the foot hinge | -Inf | Inf | foot_joint | hinge | angular velocity (rad/s) | ### Rewards The reward consists of three parts: - *healthy_reward*: Every timestep that the hopper is healthy (see definition in section "Episode Termination"), it gets a reward of fixed value `healthy_reward`. - *forward_reward*: A reward of hopping forward which is measured as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is the time between actions and is dependent on the frame_skip parameter (fixed to 4), where the frametime is 0.002 - making the default *dt = 4 * 0.002 = 0.008*. This reward would be positive if the hopper hops forward (positive x direction). - *ctrl_cost*: A cost for penalising the hopper if it takes actions that are too large. It is measured as *`ctrl_cost_weight` * sum(action2)* where *`ctrl_cost_weight`* is a parameter set for the control and has a default value of 0.001 The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost* and `info` will also contain the individual reward terms ### Starting State All observations start in state (0.0, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the values for stochasticity. ### Episode End The hopper is said to be unhealthy if any of the following happens: 1. An element of `observation[1:]` (if `exclude_current_positions_from_observation=True`, else `observation[2:]`) is no longer contained in the closed interval specified by the argument `healthy_state_range` 2. The height of the hopper (`observation[0]` if `exclude_current_positions_from_observation=True`, else `observation[1]`) is no longer contained in the closed interval specified by the argument `healthy_z_range` (usually meaning that it has fallen) 3. The angle (`observation[1]` if `exclude_current_positions_from_observation=True`, else `observation[2]`) is no longer contained in the closed interval specified by the argument `healthy_angle_range` If `terminate_when_unhealthy=True` is passed during construction (which is the default), the episode ends when any of the following happens: 1. Truncation: The episode duration reaches a 1000 timesteps 2. Termination: The hopper is unhealthy If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded. ### Arguments No additional arguments are currently supported in v2 and lower. ``` env = gym.make('Hopper-v2') ``` v3 and v4 take gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. ``` env = gym.make('Hopper-v4', ctrl_cost_weight=0.1, ....) ``` | Parameter | Type | Default | Description | | -------------------------------------------- | --------- | --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `xml_file` | **str** | `"hopper.xml"` | Path to a MuJoCo model | | `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) | | `ctrl_cost_weight` | **float** | `0.001` | Weight for _ctrl_cost_ reward (see section on reward) | | `healthy_reward` | **float** | `1` | Constant reward given if the ant is "healthy" after timestep | | `terminate_when_unhealthy` | **bool** | `True` | If true, issue a done signal if the hopper is no longer healthy | | `healthy_state_range` | **tuple** | `(-100, 100)` | The elements of `observation[1:]` (if `exclude_current_positions_from_observation=True`, else `observation[2:]`) must be in this range for the hopper to be considered healthy | | `healthy_z_range` | **tuple** | `(0.7, float("inf"))` | The z-coordinate must be in this range for the hopper to be considered healthy | | `healthy_angle_range` | **tuple** | `(-0.2, 0.2)` | The angle given by `observation[1]` (if `exclude_current_positions_from_observation=True`, else `observation[2]`) must be in this range for the hopper to be considered healthy | | `reset_noise_scale` | **float** | `5e-3` | Scale of random perturbations of initial position and velocity (see section on Starting State) | | `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies | ### Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco_py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 125, } def __init__( self, forward_reward_weight=1.0, ctrl_cost_weight=1e-3, healthy_reward=1.0, terminate_when_unhealthy=True, healthy_state_range=(-100.0, 100.0), healthy_z_range=(0.7, float("inf")), healthy_angle_range=(-0.2, 0.2), reset_noise_scale=5e-3, exclude_current_positions_from_observation=True, **kwargs ): utils.EzPickle.__init__( self, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, healthy_state_range, healthy_z_range, healthy_angle_range, reset_noise_scale, exclude_current_positions_from_observation, **kwargs ) self._forward_reward_weight = forward_reward_weight self._ctrl_cost_weight = ctrl_cost_weight self._healthy_reward = healthy_reward self._terminate_when_unhealthy = terminate_when_unhealthy self._healthy_state_range = healthy_state_range self._healthy_z_range = healthy_z_range self._healthy_angle_range = healthy_angle_range self._reset_noise_scale = reset_noise_scale self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) if exclude_current_positions_from_observation: observation_space = Box( low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64 ) else: observation_space = Box( low=-np.inf, high=np.inf, shape=(12,), dtype=np.float64 ) MujocoEnv.__init__( self, "hopper.xml", 4, observation_space=observation_space, **kwargs ) @property def healthy_reward(self): return ( float(self.is_healthy or self._terminate_when_unhealthy) * self._healthy_reward ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) return control_cost @property def is_healthy(self): z, angle = self.data.qpos[1:3] state = self.state_vector()[2:] min_state, max_state = self._healthy_state_range min_z, max_z = self._healthy_z_range min_angle, max_angle = self._healthy_angle_range healthy_state = np.all(np.logical_and(min_state < state, state < max_state)) healthy_z = min_z < z < max_z healthy_angle = min_angle < angle < max_angle is_healthy = all((healthy_state, healthy_z, healthy_angle)) return is_healthy @property def terminated(self): terminated = not self.is_healthy if self._terminate_when_unhealthy else False return terminated def _get_obs(self): position = self.data.qpos.flat.copy() velocity = np.clip(self.data.qvel.flat.copy(), -10, 10) if self._exclude_current_positions_from_observation: position = position[1:] observation = np.concatenate((position, velocity)).ravel() return observation def step(self, action): x_position_before = self.data.qpos[0] self.do_simulation(action, self.frame_skip) x_position_after = self.data.qpos[0] x_velocity = (x_position_after - x_position_before) / self.dt ctrl_cost = self.control_cost(action) forward_reward = self._forward_reward_weight * x_velocity healthy_reward = self.healthy_reward rewards = forward_reward + healthy_reward costs = ctrl_cost observation = self._get_obs() reward = rewards - costs terminated = self.terminated info = { "x_position": x_position_after, "x_velocity": x_velocity, } if self.render_mode == "human": self.render() return observation, reward, terminated, False, info def reset_model(self): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale qpos = self.init_qpos + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nq ) qvel = self.init_qvel + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nv ) self.set_state(qpos, qvel) observation = self._get_obs() return observation def viewer_setup(self): assert self.viewer is not None for key, value in DEFAULT_CAMERA_CONFIG.items(): if isinstance(value, np.ndarray): getattr(self.viewer.cam, key)[:] = value else: setattr(self.viewer.cam, key, value) ================================================ FILE: gym/envs/mujoco/humanoid.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box def mass_center(model, sim): mass = np.expand_dims(model.body_mass, 1) xpos = sim.data.xipos return (np.sum(mass * xpos, 0) / np.sum(mass))[0] class HumanoidEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 67, } def __init__(self, **kwargs): observation_space = Box( low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64 ) MuJocoPyEnv.__init__( self, "humanoid.xml", 5, observation_space=observation_space, **kwargs ) utils.EzPickle.__init__(self, **kwargs) def _get_obs(self): data = self.sim.data return np.concatenate( [ data.qpos.flat[2:], data.qvel.flat, data.cinert.flat, data.cvel.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, ] ) def step(self, a): pos_before = mass_center(self.model, self.sim) self.do_simulation(a, self.frame_skip) pos_after = mass_center(self.model, self.sim) alive_bonus = 5.0 data = self.sim.data lin_vel_cost = 1.25 * (pos_after - pos_before) / self.dt quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum() quad_impact_cost = min(quad_impact_cost, 10) reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus qpos = self.sim.data.qpos terminated = bool((qpos[2] < 1.0) or (qpos[2] > 2.0)) if self.render_mode == "human": self.render() return ( self._get_obs(), reward, terminated, False, dict( reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, reward_alive=alive_bonus, reward_impact=-quad_impact_cost, ), ) def reset_model(self): c = 0.01 self.set_state( self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), self.init_qvel + self.np_random.uniform( low=-c, high=c, size=self.model.nv, ), ) return self._get_obs() def viewer_setup(self): assert self.viewer is not None self.viewer.cam.trackbodyid = 1 self.viewer.cam.distance = self.model.stat.extent * 1.0 self.viewer.cam.lookat[2] = 2.0 self.viewer.cam.elevation = -20 ================================================ FILE: gym/envs/mujoco/humanoid_v3.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "trackbodyid": 1, "distance": 4.0, "lookat": np.array((0.0, 0.0, 2.0)), "elevation": -20.0, } def mass_center(model, sim): mass = np.expand_dims(model.body_mass, axis=1) xpos = sim.data.xipos return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy() class HumanoidEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 67, } def __init__( self, xml_file="humanoid.xml", forward_reward_weight=1.25, ctrl_cost_weight=0.1, contact_cost_weight=5e-7, contact_cost_range=(-np.inf, 10.0), healthy_reward=5.0, terminate_when_unhealthy=True, healthy_z_range=(1.0, 2.0), reset_noise_scale=1e-2, exclude_current_positions_from_observation=True, **kwargs ): utils.EzPickle.__init__( self, xml_file, forward_reward_weight, ctrl_cost_weight, contact_cost_weight, contact_cost_range, healthy_reward, terminate_when_unhealthy, healthy_z_range, reset_noise_scale, exclude_current_positions_from_observation, **kwargs ) self._forward_reward_weight = forward_reward_weight self._ctrl_cost_weight = ctrl_cost_weight self._contact_cost_weight = contact_cost_weight self._contact_cost_range = contact_cost_range self._healthy_reward = healthy_reward self._terminate_when_unhealthy = terminate_when_unhealthy self._healthy_z_range = healthy_z_range self._reset_noise_scale = reset_noise_scale self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) if exclude_current_positions_from_observation: observation_space = Box( low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64 ) else: observation_space = Box( low=-np.inf, high=np.inf, shape=(378,), dtype=np.float64 ) MuJocoPyEnv.__init__( self, xml_file, 5, observation_space=observation_space, **kwargs ) @property def healthy_reward(self): return ( float(self.is_healthy or self._terminate_when_unhealthy) * self._healthy_reward ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(self.sim.data.ctrl)) return control_cost @property def contact_cost(self): contact_forces = self.sim.data.cfrc_ext contact_cost = self._contact_cost_weight * np.sum(np.square(contact_forces)) min_cost, max_cost = self._contact_cost_range contact_cost = np.clip(contact_cost, min_cost, max_cost) return contact_cost @property def is_healthy(self): min_z, max_z = self._healthy_z_range is_healthy = min_z < self.sim.data.qpos[2] < max_z return is_healthy @property def terminated(self): terminated = (not self.is_healthy) if self._terminate_when_unhealthy else False return terminated def _get_obs(self): position = self.sim.data.qpos.flat.copy() velocity = self.sim.data.qvel.flat.copy() com_inertia = self.sim.data.cinert.flat.copy() com_velocity = self.sim.data.cvel.flat.copy() actuator_forces = self.sim.data.qfrc_actuator.flat.copy() external_contact_forces = self.sim.data.cfrc_ext.flat.copy() if self._exclude_current_positions_from_observation: position = position[2:] return np.concatenate( ( position, velocity, com_inertia, com_velocity, actuator_forces, external_contact_forces, ) ) def step(self, action): xy_position_before = mass_center(self.model, self.sim) self.do_simulation(action, self.frame_skip) xy_position_after = mass_center(self.model, self.sim) xy_velocity = (xy_position_after - xy_position_before) / self.dt x_velocity, y_velocity = xy_velocity ctrl_cost = self.control_cost(action) contact_cost = self.contact_cost forward_reward = self._forward_reward_weight * x_velocity healthy_reward = self.healthy_reward rewards = forward_reward + healthy_reward costs = ctrl_cost + contact_cost observation = self._get_obs() reward = rewards - costs terminated = self.terminated info = { "reward_linvel": forward_reward, "reward_quadctrl": -ctrl_cost, "reward_alive": healthy_reward, "reward_impact": -contact_cost, "x_position": xy_position_after[0], "y_position": xy_position_after[1], "distance_from_origin": np.linalg.norm(xy_position_after, ord=2), "x_velocity": x_velocity, "y_velocity": y_velocity, "forward_reward": forward_reward, } if self.render_mode == "human": self.render() return observation, reward, terminated, False, info def reset_model(self): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale qpos = self.init_qpos + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nq ) qvel = self.init_qvel + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nv ) self.set_state(qpos, qvel) observation = self._get_obs() return observation def viewer_setup(self): assert self.viewer is not None for key, value in DEFAULT_CAMERA_CONFIG.items(): if isinstance(value, np.ndarray): getattr(self.viewer.cam, key)[:] = value else: setattr(self.viewer.cam, key, value) ================================================ FILE: gym/envs/mujoco/humanoid_v4.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "trackbodyid": 1, "distance": 4.0, "lookat": np.array((0.0, 0.0, 2.0)), "elevation": -20.0, } def mass_center(model, data): mass = np.expand_dims(model.body_mass, axis=1) xpos = data.xipos return (np.sum(mass * xpos, axis=0) / np.sum(mass))[0:2].copy() class HumanoidEnv(MujocoEnv, utils.EzPickle): """ ### Description This environment is based on the environment introduced by Tassa, Erez and Todorov in ["Synthesis and stabilization of complex behaviors through online trajectory optimization"](https://ieeexplore.ieee.org/document/6386025). The 3D bipedal robot is designed to simulate a human. It has a torso (abdomen) with a pair of legs and arms. The legs each consist of two links, and so the arms (representing the knees and elbows respectively). The goal of the environment is to walk forward as fast as possible without falling over. ### Action Space The action space is a `Box(-1, 1, (17,), float32)`. An action represents the torques applied at the hinge joints. | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | |-----|----------------------|---------------|----------------|---------------------------------------|-------|------| | 0 | Torque applied on the hinge in the y-coordinate of the abdomen | -0.4 | 0.4 | hip_1 (front_left_leg) | hinge | torque (N m) | | 1 | Torque applied on the hinge in the z-coordinate of the abdomen | -0.4 | 0.4 | angle_1 (front_left_leg) | hinge | torque (N m) | | 2 | Torque applied on the hinge in the x-coordinate of the abdomen | -0.4 | 0.4 | hip_2 (front_right_leg) | hinge | torque (N m) | | 3 | Torque applied on the rotor between torso/abdomen and the right hip (x-coordinate) | -0.4 | 0.4 | right_hip_x (right_thigh) | hinge | torque (N m) | | 4 | Torque applied on the rotor between torso/abdomen and the right hip (z-coordinate) | -0.4 | 0.4 | right_hip_z (right_thigh) | hinge | torque (N m) | | 5 | Torque applied on the rotor between torso/abdomen and the right hip (y-coordinate) | -0.4 | 0.4 | right_hip_y (right_thigh) | hinge | torque (N m) | | 6 | Torque applied on the rotor between the right hip/thigh and the right shin | -0.4 | 0.4 | right_knee | hinge | torque (N m) | | 7 | Torque applied on the rotor between torso/abdomen and the left hip (x-coordinate) | -0.4 | 0.4 | left_hip_x (left_thigh) | hinge | torque (N m) | | 8 | Torque applied on the rotor between torso/abdomen and the left hip (z-coordinate) | -0.4 | 0.4 | left_hip_z (left_thigh) | hinge | torque (N m) | | 9 | Torque applied on the rotor between torso/abdomen and the left hip (y-coordinate) | -0.4 | 0.4 | left_hip_y (left_thigh) | hinge | torque (N m) | | 10 | Torque applied on the rotor between the left hip/thigh and the left shin | -0.4 | 0.4 | left_knee | hinge | torque (N m) | | 11 | Torque applied on the rotor between the torso and right upper arm (coordinate -1) | -0.4 | 0.4 | right_shoulder1 | hinge | torque (N m) | | 12 | Torque applied on the rotor between the torso and right upper arm (coordinate -2) | -0.4 | 0.4 | right_shoulder2 | hinge | torque (N m) | | 13 | Torque applied on the rotor between the right upper arm and right lower arm | -0.4 | 0.4 | right_elbow | hinge | torque (N m) | | 14 | Torque applied on the rotor between the torso and left upper arm (coordinate -1) | -0.4 | 0.4 | left_shoulder1 | hinge | torque (N m) | | 15 | Torque applied on the rotor between the torso and left upper arm (coordinate -2) | -0.4 | 0.4 | left_shoulder2 | hinge | torque (N m) | | 16 | Torque applied on the rotor between the left upper arm and left lower arm | -0.4 | 0.4 | left_elbow | hinge | torque (N m) | ### Observation Space Observations consist of positional values of different body parts of the Humanoid, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities. By default, observations do not include the x- and y-coordinates of the torso. These may be included by passing `exclude_current_positions_from_observation=False` during construction. In that case, the observation space will have 378 dimensions where the first two dimensions represent the x- and y-coordinates of the torso. Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively. However, by default, the observation is a `ndarray` with shape `(376,)` where the elements correspond to the following: | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | | --- | --------------------------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | -------------------------- | | 0 | z-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) | | 1 | x-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) | | 2 | y-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) | | 3 | z-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) | | 4 | w-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) | | 5 | z-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | angle (rad) | | 6 | y-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | angle (rad) | | 7 | x-angle of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | angle (rad) | | 8 | x-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | angle (rad) | | 9 | z-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | angle (rad) | | 19 | y-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | angle (rad) | | 11 | angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | angle (rad) | | 12 | x-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | angle (rad) | | 13 | z-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | angle (rad) | | 14 | y-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | angle (rad) | | 15 | angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | angle (rad) | | 16 | coordinate-1 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | angle (rad) | | 17 | coordinate-2 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | angle (rad) | | 18 | angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | angle (rad) | | 19 | coordinate-1 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | angle (rad) | | 20 | coordinate-2 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | angle (rad) | | 21 | angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | angle (rad) | | 22 | x-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) | | 23 | y-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) | | 24 | z-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) | | 25 | x-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) | | 26 | y-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) | | 27 | z-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) | | 28 | z-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | anglular velocity (rad/s) | | 29 | y-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | anglular velocity (rad/s) | | 30 | x-coordinate of angular velocity of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | aanglular velocity (rad/s) | | 31 | x-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | anglular velocity (rad/s) | | 32 | z-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | anglular velocity (rad/s) | | 33 | y-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | anglular velocity (rad/s) | | 34 | angular velocity of the angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | anglular velocity (rad/s) | | 35 | x-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | anglular velocity (rad/s) | | 36 | z-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | anglular velocity (rad/s) | | 37 | y-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | anglular velocity (rad/s) | | 38 | angular velocity of the angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | anglular velocity (rad/s) | | 39 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | anglular velocity (rad/s) | | 40 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | anglular velocity (rad/s) | | 41 | angular velocity of the angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | anglular velocity (rad/s) | | 42 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | anglular velocity (rad/s) | | 43 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | anglular velocity (rad/s) | | 44 | angular velocitty of the angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | anglular velocity (rad/s) | Additionally, after all the positional and velocity based values in the table, the observation contains (in order): - *cinert:* Mass and inertia of a single rigid body relative to the center of mass (this is an intermediate result of transition). It has shape 14*10 (*nbody * 10*) and hence adds to another 140 elements in the state space. - *cvel:* Center of mass based velocity. It has shape 14 * 6 (*nbody * 6*) and hence adds another 84 elements in the state space - *qfrc_actuator:* Constraint force generated as the actuator force. This has shape `(23,)` *(nv * 1)* and hence adds another 23 elements to the state space. - *cfrc_ext:* This is the center of mass based external force on the body. It has shape 14 * 6 (*nbody * 6*) and hence adds to another 84 elements in the state space. where *nbody* stands for the number of bodies in the robot and *nv* stands for the number of degrees of freedom (*= dim(qvel)*) The (x,y,z) coordinates are translational DOFs while the orientations are rotational DOFs expressed as quaternions. One can read more about free joints on the [Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html). **Note:** Humanoid-v4 environment no longer has the following contact forces issue. If using previous Humanoid versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0 results in the contact forces always being 0. As such we recommend to use a Mujoco-Py version < 2.0 when using the Humanoid environment if you would like to report results with contact forces (if contact forces are not used in your experiments, you can use version > 2.0). ### Rewards The reward consists of three parts: - *healthy_reward*: Every timestep that the humanoid is alive (see section Episode Termination for definition), it gets a reward of fixed value `healthy_reward` - *forward_reward*: A reward of walking forward which is measured as *`forward_reward_weight` * (average center of mass before action - average center of mass after action)/dt*. *dt* is the time between actions and is dependent on the frame_skip parameter (default is 5), where the frametime is 0.003 - making the default *dt = 5 * 0.003 = 0.015*. This reward would be positive if the humanoid walks forward (in positive x-direction). The calculation for the center of mass is defined in the `.py` file for the Humanoid. - *ctrl_cost*: A negative reward for penalising the humanoid if it has too large of a control force. If there are *nu* actuators/controls, then the control has shape `nu x 1`. It is measured as *`ctrl_cost_weight` * sum(control2)*. - *contact_cost*: A negative reward for penalising the humanoid if the external contact force is too large. It is calculated by clipping *`contact_cost_weight` * sum(external contact force2)* to the interval specified by `contact_cost_range`. The total reward returned is ***reward*** *=* *healthy_reward + forward_reward - ctrl_cost - contact_cost* and `info` will also contain the individual reward terms ### Starting State All observations start in state (0.0, 0.0, 1.4, 1.0, 0.0 ... 0.0) with a uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the positional and velocity values (values in the table) for stochasticity. Note that the initial z coordinate is intentionally selected to be high, thereby indicating a standing up humanoid. The initial orientation is designed to make it face forward as well. ### Episode End The humanoid is said to be unhealthy if the z-position of the torso is no longer contained in the closed interval specified by the argument `healthy_z_range`. If `terminate_when_unhealthy=True` is passed during construction (which is the default), the episode ends when any of the following happens: 1. Truncation: The episode duration reaches a 1000 timesteps 3. Termination: The humanoid is unhealthy If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded. ### Arguments No additional arguments are currently supported in v2 and lower. ``` env = gym.make('Humanoid-v4') ``` v3 and v4 take gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. ``` env = gym.make('Humanoid-v4', ctrl_cost_weight=0.1, ....) ``` | Parameter | Type | Default | Description | | -------------------------------------------- | --------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `xml_file` | **str** | `"humanoid.xml"` | Path to a MuJoCo model | | `forward_reward_weight` | **float** | `1.25` | Weight for _forward_reward_ term (see section on reward) | | `ctrl_cost_weight` | **float** | `0.1` | Weight for _ctrl_cost_ term (see section on reward) | | `contact_cost_weight` | **float** | `5e-7` | Weight for _contact_cost_ term (see section on reward) | | `healthy_reward` | **float** | `5.0` | Constant reward given if the humanoid is "healthy" after timestep | | `terminate_when_unhealthy` | **bool** | `True` | If true, issue a done signal if the z-coordinate of the torso is no longer in the `healthy_z_range` | | `healthy_z_range` | **tuple** | `(1.0, 2.0)` | The humanoid is considered healthy if the z-coordinate of the torso is in this range | | `reset_noise_scale` | **float** | `1e-2` | Scale of random perturbations of initial position and velocity (see section on Starting State) | | `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies | ### Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco_py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 67, } def __init__( self, forward_reward_weight=1.25, ctrl_cost_weight=0.1, healthy_reward=5.0, terminate_when_unhealthy=True, healthy_z_range=(1.0, 2.0), reset_noise_scale=1e-2, exclude_current_positions_from_observation=True, **kwargs ): utils.EzPickle.__init__( self, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, healthy_z_range, reset_noise_scale, exclude_current_positions_from_observation, **kwargs ) self._forward_reward_weight = forward_reward_weight self._ctrl_cost_weight = ctrl_cost_weight self._healthy_reward = healthy_reward self._terminate_when_unhealthy = terminate_when_unhealthy self._healthy_z_range = healthy_z_range self._reset_noise_scale = reset_noise_scale self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) if exclude_current_positions_from_observation: observation_space = Box( low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64 ) else: observation_space = Box( low=-np.inf, high=np.inf, shape=(378,), dtype=np.float64 ) MujocoEnv.__init__( self, "humanoid.xml", 5, observation_space=observation_space, **kwargs ) @property def healthy_reward(self): return ( float(self.is_healthy or self._terminate_when_unhealthy) * self._healthy_reward ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(self.data.ctrl)) return control_cost @property def is_healthy(self): min_z, max_z = self._healthy_z_range is_healthy = min_z < self.data.qpos[2] < max_z return is_healthy @property def terminated(self): terminated = (not self.is_healthy) if self._terminate_when_unhealthy else False return terminated def _get_obs(self): position = self.data.qpos.flat.copy() velocity = self.data.qvel.flat.copy() com_inertia = self.data.cinert.flat.copy() com_velocity = self.data.cvel.flat.copy() actuator_forces = self.data.qfrc_actuator.flat.copy() external_contact_forces = self.data.cfrc_ext.flat.copy() if self._exclude_current_positions_from_observation: position = position[2:] return np.concatenate( ( position, velocity, com_inertia, com_velocity, actuator_forces, external_contact_forces, ) ) def step(self, action): xy_position_before = mass_center(self.model, self.data) self.do_simulation(action, self.frame_skip) xy_position_after = mass_center(self.model, self.data) xy_velocity = (xy_position_after - xy_position_before) / self.dt x_velocity, y_velocity = xy_velocity ctrl_cost = self.control_cost(action) forward_reward = self._forward_reward_weight * x_velocity healthy_reward = self.healthy_reward rewards = forward_reward + healthy_reward observation = self._get_obs() reward = rewards - ctrl_cost terminated = self.terminated info = { "reward_linvel": forward_reward, "reward_quadctrl": -ctrl_cost, "reward_alive": healthy_reward, "x_position": xy_position_after[0], "y_position": xy_position_after[1], "distance_from_origin": np.linalg.norm(xy_position_after, ord=2), "x_velocity": x_velocity, "y_velocity": y_velocity, "forward_reward": forward_reward, } if self.render_mode == "human": self.render() return observation, reward, terminated, False, info def reset_model(self): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale qpos = self.init_qpos + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nq ) qvel = self.init_qvel + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nv ) self.set_state(qpos, qvel) observation = self._get_obs() return observation def viewer_setup(self): assert self.viewer is not None for key, value in DEFAULT_CAMERA_CONFIG.items(): if isinstance(value, np.ndarray): getattr(self.viewer.cam, key)[:] = value else: setattr(self.viewer.cam, key, value) ================================================ FILE: gym/envs/mujoco/humanoidstandup.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 67, } def __init__(self, **kwargs): observation_space = Box( low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64 ) MuJocoPyEnv.__init__( self, "humanoidstandup.xml", 5, observation_space=observation_space, **kwargs ) utils.EzPickle.__init__(self, **kwargs) def _get_obs(self): data = self.sim.data return np.concatenate( [ data.qpos.flat[2:], data.qvel.flat, data.cinert.flat, data.cvel.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, ] ) def step(self, a): self.do_simulation(a, self.frame_skip) pos_after = self.sim.data.qpos[2] data = self.sim.data uph_cost = (pos_after - 0) / self.model.opt.timestep quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum() quad_impact_cost = min(quad_impact_cost, 10) reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1 if self.render_mode == "human": self.render() return ( self._get_obs(), reward, False, False, dict( reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost, ), ) def reset_model(self): c = 0.01 self.set_state( self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), self.init_qvel + self.np_random.uniform( low=-c, high=c, size=self.model.nv, ), ) return self._get_obs() def viewer_setup(self): assert self.viewer is not None self.viewer.cam.trackbodyid = 1 self.viewer.cam.distance = self.model.stat.extent * 1.0 self.viewer.cam.lookat[2] = 0.8925 self.viewer.cam.elevation = -20 ================================================ FILE: gym/envs/mujoco/humanoidstandup_v4.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv from gym.spaces import Box class HumanoidStandupEnv(MujocoEnv, utils.EzPickle): """ ### Description This environment is based on the environment introduced by Tassa, Erez and Todorov in ["Synthesis and stabilization of complex behaviors through online trajectory optimization"](https://ieeexplore.ieee.org/document/6386025). The 3D bipedal robot is designed to simulate a human. It has a torso (abdomen) with a pair of legs and arms. The legs each consist of two links, and so the arms (representing the knees and elbows respectively). The environment starts with the humanoid laying on the ground, and then the goal of the environment is to make the humanoid standup and then keep it standing by applying torques on the various hinges. ### Action Space The agent take a 17-element vector for actions. The action space is a continuous `(action, ...)` all in `[-1, 1]`, where `action` represents the numerical torques applied at the hinge joints. | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | | --- | ---------------------------------------------------------------------------------- | ----------- | ----------- | -------------------------------- | ----- | ------------ | | 0 | Torque applied on the hinge in the y-coordinate of the abdomen | -0.4 | 0.4 | hip_1 (front_left_leg) | hinge | torque (N m) | | 1 | Torque applied on the hinge in the z-coordinate of the abdomen | -0.4 | 0.4 | angle_1 (front_left_leg) | hinge | torque (N m) | | 2 | Torque applied on the hinge in the x-coordinate of the abdomen | -0.4 | 0.4 | hip_2 (front_right_leg) | hinge | torque (N m) | | 3 | Torque applied on the rotor between torso/abdomen and the right hip (x-coordinate) | -0.4 | 0.4 | right_hip_x (right_thigh) | hinge | torque (N m) | | 4 | Torque applied on the rotor between torso/abdomen and the right hip (z-coordinate) | -0.4 | 0.4 | right_hip_z (right_thigh) | hinge | torque (N m) | | 5 | Torque applied on the rotor between torso/abdomen and the right hip (y-coordinate) | -0.4 | 0.4 | right_hip_y (right_thigh) | hinge | torque (N m) | | 6 | Torque applied on the rotor between the right hip/thigh and the right shin | -0.4 | 0.4 | right_knee | hinge | torque (N m) | | 7 | Torque applied on the rotor between torso/abdomen and the left hip (x-coordinate) | -0.4 | 0.4 | left_hip_x (left_thigh) | hinge | torque (N m) | | 8 | Torque applied on the rotor between torso/abdomen and the left hip (z-coordinate) | -0.4 | 0.4 | left_hip_z (left_thigh) | hinge | torque (N m) | | 9 | Torque applied on the rotor between torso/abdomen and the left hip (y-coordinate) | -0.4 | 0.4 | left_hip_y (left_thigh) | hinge | torque (N m) | | 10 | Torque applied on the rotor between the left hip/thigh and the left shin | -0.4 | 0.4 | left_knee | hinge | torque (N m) | | 11 | Torque applied on the rotor between the torso and right upper arm (coordinate -1) | -0.4 | 0.4 | right_shoulder1 | hinge | torque (N m) | | 12 | Torque applied on the rotor between the torso and right upper arm (coordinate -2) | -0.4 | 0.4 | right_shoulder2 | hinge | torque (N m) | | 13 | Torque applied on the rotor between the right upper arm and right lower arm | -0.4 | 0.4 | right_elbow | hinge | torque (N m) | | 14 | Torque applied on the rotor between the torso and left upper arm (coordinate -1) | -0.4 | 0.4 | left_shoulder1 | hinge | torque (N m) | | 15 | Torque applied on the rotor between the torso and left upper arm (coordinate -2) | -0.4 | 0.4 | left_shoulder2 | hinge | torque (N m) | | 16 | Torque applied on the rotor between the left upper arm and left lower arm | -0.4 | 0.4 | left_elbow | hinge | torque (N m) | ### Observation Space The state space consists of positional values of different body parts of the Humanoid, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities. **Note:** The x- and y-coordinates of the torso are being omitted to produce position-agnostic behavior in policies The observation is a `ndarray` with shape `(376,)` where the elements correspond to the following: | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | | --- | --------------------------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | -------------------------- | | 0 | z-coordinate of the torso (centre) | -Inf | Inf | root | free | position (m) | | 1 | x-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) | | 2 | y-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) | | 3 | z-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) | | 4 | w-orientation of the torso (centre) | -Inf | Inf | root | free | angle (rad) | | 5 | z-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | angle (rad) | | 6 | y-angle of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | angle (rad) | | 7 | x-angle of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | angle (rad) | | 8 | x-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | angle (rad) | | 9 | z-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | angle (rad) | | 10 | y-coordinate of angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | angle (rad) | | 11 | angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | angle (rad) | | 12 | x-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | angle (rad) | | 13 | z-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | angle (rad) | | 14 | y-coordinate of angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | angle (rad) | | 15 | angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | angle (rad) | | 16 | coordinate-1 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | angle (rad) | | 17 | coordinate-2 (multi-axis) angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | angle (rad) | | 18 | angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | angle (rad) | | 19 | coordinate-1 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | angle (rad) | | 20 | coordinate-2 (multi-axis) angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | angle (rad) | | 21 | angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | angle (rad) | | 22 | x-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) | | 23 | y-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) | | 24 | z-coordinate velocity of the torso (centre) | -Inf | Inf | root | free | velocity (m/s) | | 25 | x-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) | | 26 | y-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) | | 27 | z-coordinate angular velocity of the torso (centre) | -Inf | Inf | root | free | anglular velocity (rad/s) | | 28 | z-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_z | hinge | anglular velocity (rad/s) | | 29 | y-coordinate of angular velocity of the abdomen (in lower_waist) | -Inf | Inf | abdomen_y | hinge | anglular velocity (rad/s) | | 30 | x-coordinate of angular velocity of the abdomen (in pelvis) | -Inf | Inf | abdomen_x | hinge | aanglular velocity (rad/s) | | 31 | x-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_x | hinge | anglular velocity (rad/s) | | 32 | z-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_z | hinge | anglular velocity (rad/s) | | 33 | y-coordinate of the angular velocity of the angle between pelvis and right hip (in right_thigh) | -Inf | Inf | right_hip_y | hinge | anglular velocity (rad/s) | | 35 | angular velocity of the angle between right hip and the right shin (in right_knee) | -Inf | Inf | right_knee | hinge | anglular velocity (rad/s) | | 36 | x-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_x | hinge | anglular velocity (rad/s) | | 37 | z-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_z | hinge | anglular velocity (rad/s) | | 38 | y-coordinate of the angular velocity of the angle between pelvis and left hip (in left_thigh) | -Inf | Inf | left_hip_y | hinge | anglular velocity (rad/s) | | 39 | angular velocity of the angle between left hip and the left shin (in left_knee) | -Inf | Inf | left_knee | hinge | anglular velocity (rad/s) | | 40 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder1 | hinge | anglular velocity (rad/s) | | 41 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and right arm (in right_upper_arm) | -Inf | Inf | right_shoulder2 | hinge | anglular velocity (rad/s) | | 42 | angular velocity of the angle between right upper arm and right_lower_arm | -Inf | Inf | right_elbow | hinge | anglular velocity (rad/s) | | 43 | coordinate-1 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder1 | hinge | anglular velocity (rad/s) | | 44 | coordinate-2 (multi-axis) of the angular velocity of the angle between torso and left arm (in left_upper_arm) | -Inf | Inf | left_shoulder2 | hinge | anglular velocity (rad/s) | | 45 | angular velocitty of the angle between left upper arm and left_lower_arm | -Inf | Inf | left_elbow | hinge | anglular velocity (rad/s) | Additionally, after all the positional and velocity based values in the table, the state_space consists of (in order): - *cinert:* Mass and inertia of a single rigid body relative to the center of mass (this is an intermediate result of transition). It has shape 14*10 (*nbody * 10*) and hence adds to another 140 elements in the state space. - *cvel:* Center of mass based velocity. It has shape 14 * 6 (*nbody * 6*) and hence adds another 84 elements in the state space - *qfrc_actuator:* Constraint force generated as the actuator force. This has shape `(23,)` *(nv * 1)* and hence adds another 23 elements to the state space. - *cfrc_ext:* This is the center of mass based external force on the body. It has shape 14 * 6 (*nbody * 6*) and hence adds to another 84 elements in the state space. where *nbody* stands for the number of bodies in the robot and *nv* stands for the number of degrees of freedom (*= dim(qvel)*) The (x,y,z) coordinates are translational DOFs while the orientations are rotational DOFs expressed as quaternions. One can read more about free joints on the [Mujoco Documentation](https://mujoco.readthedocs.io/en/latest/XMLreference.html). **Note:** HumanoidStandup-v4 environment no longer has the following contact forces issue. If using previous HumanoidStandup versions from v4, there have been reported issues that using a Mujoco-Py version > 2.0 results in the contact forces always being 0. As such we recommend to use a Mujoco-Py version < 2.0 when using the Humanoid environment if you would like to report results with contact forces (if contact forces are not used in your experiments, you can use version > 2.0). ### Rewards The reward consists of three parts: - *uph_cost*: A reward for moving upward (in an attempt to stand up). This is not a relative reward which measures how much upward it has moved from the last timestep, but it is an absolute reward which measures how much upward the Humanoid has moved overall. It is measured as *(z coordinate after action - 0)/(atomic timestep)*, where *z coordinate after action* is index 0 in the state/index 2 in the table, and *atomic timestep* is the time for one frame of movement even though the simulation has a framerate of 5 (done in order to inflate rewards a little for faster learning). - *quad_ctrl_cost*: A negative reward for penalising the humanoid if it has too large of a control force. If there are *nu* actuators/controls, then the control has shape `nu x 1`. It is measured as *0.1 **x** sum(control2)*. - *quad_impact_cost*: A negative reward for penalising the humanoid if the external contact force is too large. It is calculated as *min(0.5 * 0.000001 * sum(external contact force2), 10)*. The total reward returned is ***reward*** *=* *uph_cost + 1 - quad_ctrl_cost - quad_impact_cost* ### Starting State All observations start in state (0.0, 0.0, 0.105, 1.0, 0.0 ... 0.0) with a uniform noise in the range of [-0.01, 0.01] added to the positional and velocity values (values in the table) for stochasticity. Note that the initial z coordinate is intentionally selected to be low, thereby indicating a laying down humanoid. The initial orientation is designed to make it face forward as well. ### Episode End The episode ends when any of the following happens: 1. Truncation: The episode duration reaches a 1000 timesteps 2. Termination: Any of the state space values is no longer finite ### Arguments No additional arguments are currently supported. ``` env = gym.make('HumanoidStandup-v4') ``` There is no v3 for HumanoidStandup, unlike the robot environments where a v3 and beyond take gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. ### Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco_py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 67, } def __init__(self, **kwargs): observation_space = Box( low=-np.inf, high=np.inf, shape=(376,), dtype=np.float64 ) MujocoEnv.__init__( self, "humanoidstandup.xml", 5, observation_space=observation_space, **kwargs ) utils.EzPickle.__init__(self, **kwargs) def _get_obs(self): data = self.data return np.concatenate( [ data.qpos.flat[2:], data.qvel.flat, data.cinert.flat, data.cvel.flat, data.qfrc_actuator.flat, data.cfrc_ext.flat, ] ) def step(self, a): self.do_simulation(a, self.frame_skip) pos_after = self.data.qpos[2] data = self.data uph_cost = (pos_after - 0) / self.model.opt.timestep quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() quad_impact_cost = 0.5e-6 * np.square(data.cfrc_ext).sum() quad_impact_cost = min(quad_impact_cost, 10) reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1 if self.render_mode == "human": self.render() return ( self._get_obs(), reward, False, False, dict( reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost, ), ) def reset_model(self): c = 0.01 self.set_state( self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), self.init_qvel + self.np_random.uniform( low=-c, high=c, size=self.model.nv, ), ) return self._get_obs() def viewer_setup(self): assert self.viewer is not None self.viewer.cam.trackbodyid = 1 self.viewer.cam.distance = self.model.stat.extent * 1.0 self.viewer.cam.lookat[2] = 0.8925 self.viewer.cam.elevation = -20 ================================================ FILE: gym/envs/mujoco/inverted_double_pendulum.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 20, } def __init__(self, **kwargs): observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64) MuJocoPyEnv.__init__( self, "inverted_double_pendulum.xml", 5, observation_space=observation_space, **kwargs ) utils.EzPickle.__init__(self, **kwargs) def step(self, action): self.do_simulation(action, self.frame_skip) ob = self._get_obs() x, _, y = self.sim.data.site_xpos[0] dist_penalty = 0.01 * x**2 + (y - 2) ** 2 v1, v2 = self.sim.data.qvel[1:3] vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 alive_bonus = 10 r = alive_bonus - dist_penalty - vel_penalty terminated = bool(y <= 1) if self.render_mode == "human": self.render() return ob, r, terminated, False, {} def _get_obs(self): return np.concatenate( [ self.sim.data.qpos[:1], # cart x pos np.sin(self.sim.data.qpos[1:]), # link angles np.cos(self.sim.data.qpos[1:]), np.clip(self.sim.data.qvel, -10, 10), np.clip(self.sim.data.qfrc_constraint, -10, 10), ] ).ravel() def reset_model(self): self.set_state( self.init_qpos + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq), self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1, ) return self._get_obs() def viewer_setup(self): assert self.viewer is not None v = self.viewer v.cam.trackbodyid = 0 v.cam.distance = self.model.stat.extent * 0.5 v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2] ================================================ FILE: gym/envs/mujoco/inverted_double_pendulum_v4.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv from gym.spaces import Box class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle): """ ### Description This environment originates from control theory and builds on the cartpole environment based on the work done by Barto, Sutton, and Anderson in ["Neuronlike adaptive elements that can solve difficult learning control problems"](https://ieeexplore.ieee.org/document/6313077), powered by the Mujoco physics simulator - allowing for more complex experiments (such as varying the effects of gravity or constraints). This environment involves a cart that can moved linearly, with a pole fixed on it and a second pole fixed on the other end of the first one (leaving the second pole as the only one with one free end). The cart can be pushed left or right, and the goal is to balance the second pole on top of the first pole, which is in turn on top of the cart, by applying continuous forces on the cart. ### Action Space The agent take a 1-element vector for actions. The action space is a continuous `(action)` in `[-1, 1]`, where `action` represents the numerical force applied to the cart (with magnitude representing the amount of force and sign representing the direction) | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | |-----|---------------------------|-------------|-------------|----------------------------------|-------|-----------| | 0 | Force applied on the cart | -1 | 1 | slider | slide | Force (N) | ### Observation Space The state space consists of positional values of different body parts of the pendulum system, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities. The observation is a `ndarray` with shape `(11,)` where the elements correspond to the following: | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | | --- | ----------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ | | 0 | position of the cart along the linear surface | -Inf | Inf | slider | slide | position (m) | | 1 | sine of the angle between the cart and the first pole | -Inf | Inf | sin(hinge) | hinge | unitless | | 2 | sine of the angle between the two poles | -Inf | Inf | sin(hinge2) | hinge | unitless | | 3 | cosine of the angle between the cart and the first pole | -Inf | Inf | cos(hinge) | hinge | unitless | | 4 | cosine of the angle between the two poles | -Inf | Inf | cos(hinge2) | hinge | unitless | | 5 | velocity of the cart | -Inf | Inf | slider | slide | velocity (m/s) | | 6 | angular velocity of the angle between the cart and the first pole | -Inf | Inf | hinge | hinge | angular velocity (rad/s) | | 7 | angular velocity of the angle between the two poles | -Inf | Inf | hinge2 | hinge | angular velocity (rad/s) | | 8 | constraint force - 1 | -Inf | Inf | | | Force (N) | | 9 | constraint force - 2 | -Inf | Inf | | | Force (N) | | 10 | constraint force - 3 | -Inf | Inf | | | Force (N) | There is physical contact between the robots and their environment - and Mujoco attempts at getting realisitic physics simulations for the possible physical contact dynamics by aiming for physical accuracy and computational efficiency. There is one constraint force for contacts for each degree of freedom (3). The approach and handling of constraints by Mujoco is unique to the simulator and is based on their research. Once can find more information in their [*documentation*](https://mujoco.readthedocs.io/en/latest/computation.html) or in their paper ["Analytically-invertible dynamics with contacts and constraints: Theory and implementation in MuJoCo"](https://homes.cs.washington.edu/~todorov/papers/TodorovICRA14.pdf). ### Rewards The reward consists of two parts: - *alive_bonus*: The goal is to make the second inverted pendulum stand upright (within a certain angle limit) as long as possible - as such a reward of +10 is awarded for each timestep that the second pole is upright. - *distance_penalty*: This reward is a measure of how far the *tip* of the second pendulum (the only free end) moves, and it is calculated as *0.01 * x2 + (y - 2)2*, where *x* is the x-coordinate of the tip and *y* is the y-coordinate of the tip of the second pole. - *velocity_penalty*: A negative reward for penalising the agent if it moves too fast *0.001 * v12 + 0.005 * v2 2* The total reward returned is ***reward*** *=* *alive_bonus - distance_penalty - velocity_penalty* ### Starting State All observations start in state (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise in the range of [-0.1, 0.1] added to the positional values (cart position and pole angles) and standard normal force with a standard deviation of 0.1 added to the velocity values for stochasticity. ### Episode End The episode ends when any of the following happens: 1.Truncation: The episode duration reaches 1000 timesteps. 2.Termination: Any of the state space values is no longer finite. 3.Termination: The y_coordinate of the tip of the second pole *is less than or equal* to 1. The maximum standing height of the system is 1.196 m when all the parts are perpendicularly vertical on top of each other). ### Arguments No additional arguments are currently supported. ``` env = gym.make('InvertedDoublePendulum-v4') ``` There is no v3 for InvertedPendulum, unlike the robot environments where a v3 and beyond take gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. ### Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco_py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum) * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 20, } def __init__(self, **kwargs): observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64) MujocoEnv.__init__( self, "inverted_double_pendulum.xml", 5, observation_space=observation_space, **kwargs ) utils.EzPickle.__init__(self, **kwargs) def step(self, action): self.do_simulation(action, self.frame_skip) ob = self._get_obs() x, _, y = self.data.site_xpos[0] dist_penalty = 0.01 * x**2 + (y - 2) ** 2 v1, v2 = self.data.qvel[1:3] vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 alive_bonus = 10 r = alive_bonus - dist_penalty - vel_penalty terminated = bool(y <= 1) if self.render_mode == "human": self.render() return ob, r, terminated, False, {} def _get_obs(self): return np.concatenate( [ self.data.qpos[:1], # cart x pos np.sin(self.data.qpos[1:]), # link angles np.cos(self.data.qpos[1:]), np.clip(self.data.qvel, -10, 10), np.clip(self.data.qfrc_constraint, -10, 10), ] ).ravel() def reset_model(self): self.set_state( self.init_qpos + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq), self.init_qvel + self.np_random.standard_normal(self.model.nv) * 0.1, ) return self._get_obs() def viewer_setup(self): assert self.viewer is not None v = self.viewer v.cam.trackbodyid = 0 v.cam.distance = self.model.stat.extent * 0.5 v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2] ================================================ FILE: gym/envs/mujoco/inverted_pendulum.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 25, } def __init__(self, **kwargs): utils.EzPickle.__init__(self, **kwargs) observation_space = Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64) MuJocoPyEnv.__init__( self, "inverted_pendulum.xml", 2, observation_space=observation_space, **kwargs ) def step(self, a): reward = 1.0 self.do_simulation(a, self.frame_skip) ob = self._get_obs() terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2)) if self.render_mode == "human": self.render() return ob, reward, terminated, False, {} def reset_model(self): qpos = self.init_qpos + self.np_random.uniform( size=self.model.nq, low=-0.01, high=0.01 ) qvel = self.init_qvel + self.np_random.uniform( size=self.model.nv, low=-0.01, high=0.01 ) self.set_state(qpos, qvel) return self._get_obs() def _get_obs(self): return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel() def viewer_setup(self): assert self.viewer is not None self.viewer.cam.trackbodyid = 0 self.viewer.cam.distance = self.model.stat.extent ================================================ FILE: gym/envs/mujoco/inverted_pendulum_v4.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv from gym.spaces import Box class InvertedPendulumEnv(MujocoEnv, utils.EzPickle): """ ### Description This environment is the cartpole environment based on the work done by Barto, Sutton, and Anderson in ["Neuronlike adaptive elements that can solve difficult learning control problems"](https://ieeexplore.ieee.org/document/6313077), just like in the classic environments but now powered by the Mujoco physics simulator - allowing for more complex experiments (such as varying the effects of gravity). This environment involves a cart that can moved linearly, with a pole fixed on it at one end and having another end free. The cart can be pushed left or right, and the goal is to balance the pole on the top of the cart by applying forces on the cart. ### Action Space The agent take a 1-element vector for actions. The action space is a continuous `(action)` in `[-3, 3]`, where `action` represents the numerical force applied to the cart (with magnitude representing the amount of force and sign representing the direction) | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | |-----|---------------------------|-------------|-------------|----------------------------------|-------|-----------| | 0 | Force applied on the cart | -3 | 3 | slider | slide | Force (N) | ### Observation Space The state space consists of positional values of different body parts of the pendulum system, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities. The observation is a `ndarray` with shape `(4,)` where the elements correspond to the following: | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | | --- | --------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------- | | 0 | position of the cart along the linear surface | -Inf | Inf | slider | slide | position (m) | | 1 | vertical angle of the pole on the cart | -Inf | Inf | hinge | hinge | angle (rad) | | 2 | linear velocity of the cart | -Inf | Inf | slider | slide | velocity (m/s) | | 3 | angular velocity of the pole on the cart | -Inf | Inf | hinge | hinge | anglular velocity (rad/s) | ### Rewards The goal is to make the inverted pendulum stand upright (within a certain angle limit) as long as possible - as such a reward of +1 is awarded for each timestep that the pole is upright. ### Starting State All observations start in state (0.0, 0.0, 0.0, 0.0) with a uniform noise in the range of [-0.01, 0.01] added to the values for stochasticity. ### Episode End The episode ends when any of the following happens: 1. Truncation: The episode duration reaches 1000 timesteps. 2. Termination: Any of the state space values is no longer finite. 3. Termination: The absolutely value of the vertical angle between the pole and the cart is greater than 0.2 radian. ### Arguments No additional arguments are currently supported. ``` env = gym.make('InvertedPendulum-v4') ``` There is no v3 for InvertedPendulum, unlike the robot environments where a v3 and beyond take gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. ### Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco_py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks (including inverted pendulum) * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 25, } def __init__(self, **kwargs): utils.EzPickle.__init__(self, **kwargs) observation_space = Box(low=-np.inf, high=np.inf, shape=(4,), dtype=np.float64) MujocoEnv.__init__( self, "inverted_pendulum.xml", 2, observation_space=observation_space, **kwargs ) def step(self, a): reward = 1.0 self.do_simulation(a, self.frame_skip) ob = self._get_obs() terminated = bool(not np.isfinite(ob).all() or (np.abs(ob[1]) > 0.2)) if self.render_mode == "human": self.render() return ob, reward, terminated, False, {} def reset_model(self): qpos = self.init_qpos + self.np_random.uniform( size=self.model.nq, low=-0.01, high=0.01 ) qvel = self.init_qvel + self.np_random.uniform( size=self.model.nv, low=-0.01, high=0.01 ) self.set_state(qpos, qvel) return self._get_obs() def _get_obs(self): return np.concatenate([self.data.qpos, self.data.qvel]).ravel() def viewer_setup(self): assert self.viewer is not None v = self.viewer v.cam.trackbodyid = 0 v.cam.distance = self.model.stat.extent ================================================ FILE: gym/envs/mujoco/mujoco_env.py ================================================ from os import path from typing import Optional, Union import numpy as np import gym from gym import error, logger, spaces from gym.spaces import Space try: import mujoco_py except ImportError as e: MUJOCO_PY_IMPORT_ERROR = e else: MUJOCO_PY_IMPORT_ERROR = None try: import mujoco except ImportError as e: MUJOCO_IMPORT_ERROR = e else: MUJOCO_IMPORT_ERROR = None DEFAULT_SIZE = 480 class BaseMujocoEnv(gym.Env): """Superclass for all MuJoCo environments.""" def __init__( self, model_path, frame_skip, observation_space: Space, render_mode: Optional[str] = None, width: int = DEFAULT_SIZE, height: int = DEFAULT_SIZE, camera_id: Optional[int] = None, camera_name: Optional[str] = None, ): if model_path.startswith("/"): self.fullpath = model_path else: self.fullpath = path.join(path.dirname(__file__), "assets", model_path) if not path.exists(self.fullpath): raise OSError(f"File {self.fullpath} does not exist") self.width = width self.height = height self._initialize_simulation() # may use width and height self.init_qpos = self.data.qpos.ravel().copy() self.init_qvel = self.data.qvel.ravel().copy() self._viewers = {} self.frame_skip = frame_skip self.viewer = None assert self.metadata["render_modes"] == [ "human", "rgb_array", "depth_array", ], self.metadata["render_modes"] assert ( int(np.round(1.0 / self.dt)) == self.metadata["render_fps"] ), f'Expected value: {int(np.round(1.0 / self.dt))}, Actual value: {self.metadata["render_fps"]}' self.observation_space = observation_space self._set_action_space() self.render_mode = render_mode self.camera_name = camera_name self.camera_id = camera_id def _set_action_space(self): bounds = self.model.actuator_ctrlrange.copy().astype(np.float32) low, high = bounds.T self.action_space = spaces.Box(low=low, high=high, dtype=np.float32) return self.action_space # methods to override: # ---------------------------- def reset_model(self): """ Reset the robot degrees of freedom (qpos and qvel). Implement this in each subclass. """ raise NotImplementedError def viewer_setup(self): """ This method is called when the viewer is initialized. Optionally implement this method, if you need to tinker with camera position and so forth. """ def _initialize_simulation(self): """ Initialize MuJoCo simulation data structures mjModel and mjData. """ raise NotImplementedError def _reset_simulation(self): """ Reset MuJoCo simulation data structures, mjModel and mjData. """ raise NotImplementedError def _step_mujoco_simulation(self, ctrl, n_frames): """ Step over the MuJoCo simulation. """ raise NotImplementedError def render(self): """ Render a frame from the MuJoCo simulation as specified by the render_mode. """ raise NotImplementedError # ----------------------------- def reset( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ): super().reset(seed=seed) self._reset_simulation() ob = self.reset_model() if self.render_mode == "human": self.render() return ob, {} def set_state(self, qpos, qvel): """ Set the joints position qpos and velocity qvel of the model. Override this method depending on the MuJoCo bindings used. """ assert qpos.shape == (self.model.nq,) and qvel.shape == (self.model.nv,) @property def dt(self): return self.model.opt.timestep * self.frame_skip def do_simulation(self, ctrl, n_frames): """ Step the simulation n number of frames and applying a control action. """ # Check control input is contained in the action space if np.array(ctrl).shape != self.action_space.shape: raise ValueError("Action dimension mismatch") self._step_mujoco_simulation(ctrl, n_frames) def close(self): if self.viewer is not None: self.viewer = None self._viewers = {} def get_body_com(self, body_name): """Return the cartesian position of a body frame""" raise NotImplementedError def state_vector(self): """Return the position and velocity joint states of the model""" return np.concatenate([self.data.qpos.flat, self.data.qvel.flat]) class MuJocoPyEnv(BaseMujocoEnv): def __init__( self, model_path: str, frame_skip: int, observation_space: Space, render_mode: Optional[str] = None, width: int = DEFAULT_SIZE, height: int = DEFAULT_SIZE, camera_id: Optional[int] = None, camera_name: Optional[str] = None, ): if MUJOCO_PY_IMPORT_ERROR is not None: raise error.DependencyNotInstalled( f"{MUJOCO_PY_IMPORT_ERROR}. (HINT: you need to install mujoco_py, and also perform the setup instructions here: https://github.com/openai/mujoco-py/.)" ) logger.warn( "This version of the mujoco environments depends " "on the mujoco-py bindings, which are no longer maintained " "and may stop working. Please upgrade to the v4 versions of " "the environments (which depend on the mujoco python bindings instead), unless " "you are trying to precisely replicate previous works)." ) super().__init__( model_path, frame_skip, observation_space, render_mode, width, height, camera_id, camera_name, ) def _initialize_simulation(self): self.model = mujoco_py.load_model_from_path(self.fullpath) self.sim = mujoco_py.MjSim(self.model) self.data = self.sim.data def _reset_simulation(self): self.sim.reset() def set_state(self, qpos, qvel): super().set_state(qpos, qvel) state = self.sim.get_state() state = mujoco_py.MjSimState(state.time, qpos, qvel, state.act, state.udd_state) self.sim.set_state(state) self.sim.forward() def _step_mujoco_simulation(self, ctrl, n_frames): self.sim.data.ctrl[:] = ctrl for _ in range(n_frames): self.sim.step() def render(self): if self.render_mode is None: gym.logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) return width, height = self.width, self.height camera_name, camera_id = self.camera_name, self.camera_id if self.render_mode in {"rgb_array", "depth_array"}: if camera_id is not None and camera_name is not None: raise ValueError( "Both `camera_id` and `camera_name` cannot be" " specified at the same time." ) no_camera_specified = camera_name is None and camera_id is None if no_camera_specified: camera_name = "track" if camera_id is None and camera_name in self.model._camera_name2id: if camera_name in self.model._camera_name2id: camera_id = self.model.camera_name2id(camera_name) self._get_viewer(self.render_mode).render( width, height, camera_id=camera_id ) if self.render_mode == "rgb_array": data = self._get_viewer(self.render_mode).read_pixels( width, height, depth=False ) # original image is upside-down, so flip it return data[::-1, :, :] elif self.render_mode == "depth_array": self._get_viewer(self.render_mode).render(width, height) # Extract depth part of the read_pixels() tuple data = self._get_viewer(self.render_mode).read_pixels( width, height, depth=True )[1] # original image is upside-down, so flip it return data[::-1, :] elif self.render_mode == "human": self._get_viewer(self.render_mode).render() def _get_viewer( self, mode ) -> Union["mujoco_py.MjViewer", "mujoco_py.MjRenderContextOffscreen"]: self.viewer = self._viewers.get(mode) if self.viewer is None: if mode == "human": self.viewer = mujoco_py.MjViewer(self.sim) elif mode in {"rgb_array", "depth_array"}: self.viewer = mujoco_py.MjRenderContextOffscreen(self.sim, -1) else: raise AttributeError( f"Unknown mode: {mode}, expected modes: {self.metadata['render_modes']}" ) self.viewer_setup() self._viewers[mode] = self.viewer return self.viewer def get_body_com(self, body_name): return self.data.get_body_xpos(body_name) class MujocoEnv(BaseMujocoEnv): """Superclass for MuJoCo environments.""" def __init__( self, model_path, frame_skip, observation_space: Space, render_mode: Optional[str] = None, width: int = DEFAULT_SIZE, height: int = DEFAULT_SIZE, camera_id: Optional[int] = None, camera_name: Optional[str] = None, ): if MUJOCO_IMPORT_ERROR is not None: raise error.DependencyNotInstalled( f"{MUJOCO_IMPORT_ERROR}. (HINT: you need to install mujoco)" ) super().__init__( model_path, frame_skip, observation_space, render_mode, width, height, camera_id, camera_name, ) def _initialize_simulation(self): self.model = mujoco.MjModel.from_xml_path(self.fullpath) # MjrContext will copy model.vis.global_.off* to con.off* self.model.vis.global_.offwidth = self.width self.model.vis.global_.offheight = self.height self.data = mujoco.MjData(self.model) def _reset_simulation(self): mujoco.mj_resetData(self.model, self.data) def set_state(self, qpos, qvel): super().set_state(qpos, qvel) self.data.qpos[:] = np.copy(qpos) self.data.qvel[:] = np.copy(qvel) if self.model.na == 0: self.data.act[:] = None mujoco.mj_forward(self.model, self.data) def _step_mujoco_simulation(self, ctrl, n_frames): self.data.ctrl[:] = ctrl mujoco.mj_step(self.model, self.data, nstep=self.frame_skip) # As of MuJoCo 2.0, force-related quantities like cacc are not computed # unless there's a force sensor in the model. # See https://github.com/openai/gym/issues/1541 mujoco.mj_rnePostConstraint(self.model, self.data) def render(self): if self.render_mode is None: gym.logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) return if self.render_mode in { "rgb_array", "depth_array", }: camera_id = self.camera_id camera_name = self.camera_name if camera_id is not None and camera_name is not None: raise ValueError( "Both `camera_id` and `camera_name` cannot be" " specified at the same time." ) no_camera_specified = camera_name is None and camera_id is None if no_camera_specified: camera_name = "track" if camera_id is None: camera_id = mujoco.mj_name2id( self.model, mujoco.mjtObj.mjOBJ_CAMERA, camera_name, ) self._get_viewer(self.render_mode).render(camera_id=camera_id) if self.render_mode == "rgb_array": data = self._get_viewer(self.render_mode).read_pixels(depth=False) # original image is upside-down, so flip it return data[::-1, :, :] elif self.render_mode == "depth_array": self._get_viewer(self.render_mode).render() # Extract depth part of the read_pixels() tuple data = self._get_viewer(self.render_mode).read_pixels(depth=True)[1] # original image is upside-down, so flip it return data[::-1, :] elif self.render_mode == "human": self._get_viewer(self.render_mode).render() def close(self): if self.viewer is not None: self.viewer.close() super().close() def _get_viewer( self, mode ) -> Union[ "gym.envs.mujoco.mujoco_rendering.Viewer", "gym.envs.mujoco.mujoco_rendering.RenderContextOffscreen", ]: self.viewer = self._viewers.get(mode) if self.viewer is None: if mode == "human": from gym.envs.mujoco.mujoco_rendering import Viewer self.viewer = Viewer(self.model, self.data) elif mode in {"rgb_array", "depth_array"}: from gym.envs.mujoco.mujoco_rendering import RenderContextOffscreen self.viewer = RenderContextOffscreen(self.model, self.data) else: raise AttributeError( f"Unexpected mode: {mode}, expected modes: {self.metadata['render_modes']}" ) self.viewer_setup() self._viewers[mode] = self.viewer return self.viewer def get_body_com(self, body_name): return self.data.body(body_name).xpos ================================================ FILE: gym/envs/mujoco/mujoco_rendering.py ================================================ import collections import os import time from threading import Lock import glfw import imageio import mujoco import numpy as np def _import_egl(width, height): from mujoco.egl import GLContext return GLContext(width, height) def _import_glfw(width, height): from mujoco.glfw import GLContext return GLContext(width, height) def _import_osmesa(width, height): from mujoco.osmesa import GLContext return GLContext(width, height) _ALL_RENDERERS = collections.OrderedDict( [ ("glfw", _import_glfw), ("egl", _import_egl), ("osmesa", _import_osmesa), ] ) class RenderContext: """Render context superclass for offscreen and window rendering.""" def __init__(self, model, data, offscreen=True): self.model = model self.data = data self.offscreen = offscreen self.offwidth = model.vis.global_.offwidth self.offheight = model.vis.global_.offheight max_geom = 1000 mujoco.mj_forward(self.model, self.data) self.scn = mujoco.MjvScene(self.model, max_geom) self.cam = mujoco.MjvCamera() self.vopt = mujoco.MjvOption() self.pert = mujoco.MjvPerturb() self.con = mujoco.MjrContext(self.model, mujoco.mjtFontScale.mjFONTSCALE_150) self._markers = [] self._overlays = {} self._init_camera() self._set_mujoco_buffers() def _set_mujoco_buffers(self): if self.offscreen: mujoco.mjr_setBuffer(mujoco.mjtFramebuffer.mjFB_OFFSCREEN, self.con) if self.con.currentBuffer != mujoco.mjtFramebuffer.mjFB_OFFSCREEN: raise RuntimeError("Offscreen rendering not supported") else: mujoco.mjr_setBuffer(mujoco.mjtFramebuffer.mjFB_WINDOW, self.con) if self.con.currentBuffer != mujoco.mjtFramebuffer.mjFB_WINDOW: raise RuntimeError("Window rendering not supported") def render(self, camera_id=None, segmentation=False): width, height = self.offwidth, self.offheight rect = mujoco.MjrRect(left=0, bottom=0, width=width, height=height) if camera_id is not None: if camera_id == -1: self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE else: self.cam.type = mujoco.mjtCamera.mjCAMERA_FIXED self.cam.fixedcamid = camera_id mujoco.mjv_updateScene( self.model, self.data, self.vopt, self.pert, self.cam, mujoco.mjtCatBit.mjCAT_ALL, self.scn, ) if segmentation: self.scn.flags[mujoco.mjtRndFlag.mjRND_SEGMENT] = 1 self.scn.flags[mujoco.mjtRndFlag.mjRND_IDCOLOR] = 1 for marker_params in self._markers: self._add_marker_to_scene(marker_params) mujoco.mjr_render(rect, self.scn, self.con) for gridpos, (text1, text2) in self._overlays.items(): mujoco.mjr_overlay( mujoco.mjtFontScale.mjFONTSCALE_150, gridpos, rect, text1.encode(), text2.encode(), self.con, ) if segmentation: self.scn.flags[mujoco.mjtRndFlag.mjRND_SEGMENT] = 0 self.scn.flags[mujoco.mjtRndFlag.mjRND_IDCOLOR] = 0 def read_pixels(self, depth=True, segmentation=False): width, height = self.offwidth, self.offheight rect = mujoco.MjrRect(left=0, bottom=0, width=width, height=height) rgb_arr = np.zeros(3 * rect.width * rect.height, dtype=np.uint8) depth_arr = np.zeros(rect.width * rect.height, dtype=np.float32) mujoco.mjr_readPixels(rgb_arr, depth_arr, rect, self.con) rgb_img = rgb_arr.reshape(rect.height, rect.width, 3) ret_img = rgb_img if segmentation: seg_img = ( rgb_img[:, :, 0] + rgb_img[:, :, 1] * (2**8) + rgb_img[:, :, 2] * (2**16) ) seg_img[seg_img >= (self.scn.ngeom + 1)] = 0 seg_ids = np.full((self.scn.ngeom + 1, 2), fill_value=-1, dtype=np.int32) for i in range(self.scn.ngeom): geom = self.scn.geoms[i] if geom.segid != -1: seg_ids[geom.segid + 1, 0] = geom.objtype seg_ids[geom.segid + 1, 1] = geom.objid ret_img = seg_ids[seg_img] if depth: depth_img = depth_arr.reshape(rect.height, rect.width) return (ret_img, depth_img) else: return ret_img def _init_camera(self): self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE self.cam.fixedcamid = -1 for i in range(3): self.cam.lookat[i] = np.median(self.data.geom_xpos[:, i]) self.cam.distance = self.model.stat.extent def add_overlay(self, gridpos: int, text1: str, text2: str): """Overlays text on the scene.""" if gridpos not in self._overlays: self._overlays[gridpos] = ["", ""] self._overlays[gridpos][0] += text1 + "\n" self._overlays[gridpos][1] += text2 + "\n" def add_marker(self, **marker_params): self._markers.append(marker_params) def _add_marker_to_scene(self, marker): if self.scn.ngeom >= self.scn.maxgeom: raise RuntimeError("Ran out of geoms. maxgeom: %d" % self.scn.maxgeom) g = self.scn.geoms[self.scn.ngeom] # default values. g.dataid = -1 g.objtype = mujoco.mjtObj.mjOBJ_UNKNOWN g.objid = -1 g.category = mujoco.mjtCatBit.mjCAT_DECOR g.texid = -1 g.texuniform = 0 g.texrepeat[0] = 1 g.texrepeat[1] = 1 g.emission = 0 g.specular = 0.5 g.shininess = 0.5 g.reflectance = 0 g.type = mujoco.mjtGeom.mjGEOM_BOX g.size[:] = np.ones(3) * 0.1 g.mat[:] = np.eye(3) g.rgba[:] = np.ones(4) for key, value in marker.items(): if isinstance(value, (int, float, mujoco._enums.mjtGeom)): setattr(g, key, value) elif isinstance(value, (tuple, list, np.ndarray)): attr = getattr(g, key) attr[:] = np.asarray(value).reshape(attr.shape) elif isinstance(value, str): assert key == "label", "Only label is a string in mjtGeom." if value is None: g.label[0] = 0 else: g.label = value elif hasattr(g, key): raise ValueError( "mjtGeom has attr {} but type {} is invalid".format( key, type(value) ) ) else: raise ValueError("mjtGeom doesn't have field %s" % key) self.scn.ngeom += 1 def close(self): """Override close in your rendering subclass to perform any necessary cleanup after env.close() is called. """ pass class RenderContextOffscreen(RenderContext): """Offscreen rendering class with opengl context.""" def __init__(self, model, data): # We must make GLContext before MjrContext width = model.vis.global_.offwidth height = model.vis.global_.offheight self._get_opengl_backend(width, height) self.opengl_context.make_current() super().__init__(model, data, offscreen=True) def _get_opengl_backend(self, width, height): backend = os.environ.get("MUJOCO_GL") if backend is not None: try: self.opengl_context = _ALL_RENDERERS[backend](width, height) except KeyError: raise RuntimeError( "Environment variable {} must be one of {!r}: got {!r}.".format( "MUJOCO_GL", _ALL_RENDERERS.keys(), backend ) ) else: for name, _ in _ALL_RENDERERS.items(): try: self.opengl_context = _ALL_RENDERERS[name](width, height) backend = name break except: # noqa:E722 pass if backend is None: raise RuntimeError( "No OpenGL backend could be imported. Attempting to create a " "rendering context will result in a RuntimeError." ) class Viewer(RenderContext): """Class for window rendering in all MuJoCo environments.""" def __init__(self, model, data): self._gui_lock = Lock() self._button_left_pressed = False self._button_right_pressed = False self._last_mouse_x = 0 self._last_mouse_y = 0 self._paused = False self._transparent = False self._contacts = False self._render_every_frame = True self._image_idx = 0 self._image_path = "/tmp/frame_%07d.png" self._time_per_render = 1 / 60.0 self._run_speed = 1.0 self._loop_count = 0 self._advance_by_one_step = False self._hide_menu = False # glfw init glfw.init() width, height = glfw.get_video_mode(glfw.get_primary_monitor()).size self.window = glfw.create_window(width // 2, height // 2, "mujoco", None, None) glfw.make_context_current(self.window) glfw.swap_interval(1) framebuffer_width, framebuffer_height = glfw.get_framebuffer_size(self.window) window_width, _ = glfw.get_window_size(self.window) self._scale = framebuffer_width * 1.0 / window_width # set callbacks glfw.set_cursor_pos_callback(self.window, self._cursor_pos_callback) glfw.set_mouse_button_callback(self.window, self._mouse_button_callback) glfw.set_scroll_callback(self.window, self._scroll_callback) glfw.set_key_callback(self.window, self._key_callback) # get viewport self.viewport = mujoco.MjrRect(0, 0, framebuffer_width, framebuffer_height) super().__init__(model, data, offscreen=False) def _key_callback(self, window, key, scancode, action, mods): if action != glfw.RELEASE: return # Switch cameras elif key == glfw.KEY_TAB: self.cam.fixedcamid += 1 self.cam.type = mujoco.mjtCamera.mjCAMERA_FIXED if self.cam.fixedcamid >= self.model.ncam: self.cam.fixedcamid = -1 self.cam.type = mujoco.mjtCamera.mjCAMERA_FREE # Pause simulation elif key == glfw.KEY_SPACE and self._paused is not None: self._paused = not self._paused # Advances simulation by one step. elif key == glfw.KEY_RIGHT and self._paused is not None: self._advance_by_one_step = True self._paused = True # Slows down simulation elif key == glfw.KEY_S: self._run_speed /= 2.0 # Speeds up simulation elif key == glfw.KEY_F: self._run_speed *= 2.0 # Turn off / turn on rendering every frame. elif key == glfw.KEY_D: self._render_every_frame = not self._render_every_frame # Capture screenshot elif key == glfw.KEY_T: img = np.zeros( ( glfw.get_framebuffer_size(self.window)[1], glfw.get_framebuffer_size(self.window)[0], 3, ), dtype=np.uint8, ) mujoco.mjr_readPixels(img, None, self.viewport, self.con) imageio.imwrite(self._image_path % self._image_idx, np.flipud(img)) self._image_idx += 1 # Display contact forces elif key == glfw.KEY_C: self._contacts = not self._contacts self.vopt.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = self._contacts self.vopt.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] = self._contacts # Display coordinate frames elif key == glfw.KEY_E: self.vopt.frame = 1 - self.vopt.frame # Hide overlay menu elif key == glfw.KEY_H: self._hide_menu = not self._hide_menu # Make transparent elif key == glfw.KEY_R: self._transparent = not self._transparent if self._transparent: self.model.geom_rgba[:, 3] /= 5.0 else: self.model.geom_rgba[:, 3] *= 5.0 # Geom group visibility elif key in (glfw.KEY_0, glfw.KEY_1, glfw.KEY_2, glfw.KEY_3, glfw.KEY_4): self.vopt.geomgroup[key - glfw.KEY_0] ^= 1 # Quit if key == glfw.KEY_ESCAPE: print("Pressed ESC") print("Quitting.") glfw.destroy_window(self.window) glfw.terminate() def _cursor_pos_callback(self, window, xpos, ypos): if not (self._button_left_pressed or self._button_right_pressed): return mod_shift = ( glfw.get_key(window, glfw.KEY_LEFT_SHIFT) == glfw.PRESS or glfw.get_key(window, glfw.KEY_RIGHT_SHIFT) == glfw.PRESS ) if self._button_right_pressed: action = ( mujoco.mjtMouse.mjMOUSE_MOVE_H if mod_shift else mujoco.mjtMouse.mjMOUSE_MOVE_V ) elif self._button_left_pressed: action = ( mujoco.mjtMouse.mjMOUSE_ROTATE_H if mod_shift else mujoco.mjtMouse.mjMOUSE_ROTATE_V ) else: action = mujoco.mjtMouse.mjMOUSE_ZOOM dx = int(self._scale * xpos) - self._last_mouse_x dy = int(self._scale * ypos) - self._last_mouse_y width, height = glfw.get_framebuffer_size(window) with self._gui_lock: mujoco.mjv_moveCamera( self.model, action, dx / height, dy / height, self.scn, self.cam ) self._last_mouse_x = int(self._scale * xpos) self._last_mouse_y = int(self._scale * ypos) def _mouse_button_callback(self, window, button, act, mods): self._button_left_pressed = ( glfw.get_mouse_button(window, glfw.MOUSE_BUTTON_LEFT) == glfw.PRESS ) self._button_right_pressed = ( glfw.get_mouse_button(window, glfw.MOUSE_BUTTON_RIGHT) == glfw.PRESS ) x, y = glfw.get_cursor_pos(window) self._last_mouse_x = int(self._scale * x) self._last_mouse_y = int(self._scale * y) def _scroll_callback(self, window, x_offset, y_offset): with self._gui_lock: mujoco.mjv_moveCamera( self.model, mujoco.mjtMouse.mjMOUSE_ZOOM, 0, -0.05 * y_offset, self.scn, self.cam, ) def _create_overlay(self): topleft = mujoco.mjtGridPos.mjGRID_TOPLEFT bottomleft = mujoco.mjtGridPos.mjGRID_BOTTOMLEFT if self._render_every_frame: self.add_overlay(topleft, "", "") else: self.add_overlay( topleft, "Run speed = %.3f x real time" % self._run_speed, "[S]lower, [F]aster", ) self.add_overlay( topleft, "Ren[d]er every frame", "On" if self._render_every_frame else "Off" ) self.add_overlay( topleft, "Switch camera (#cams = %d)" % (self.model.ncam + 1), "[Tab] (camera ID = %d)" % self.cam.fixedcamid, ) self.add_overlay(topleft, "[C]ontact forces", "On" if self._contacts else "Off") self.add_overlay(topleft, "T[r]ansparent", "On" if self._transparent else "Off") if self._paused is not None: if not self._paused: self.add_overlay(topleft, "Stop", "[Space]") else: self.add_overlay(topleft, "Start", "[Space]") self.add_overlay( topleft, "Advance simulation by one step", "[right arrow]" ) self.add_overlay( topleft, "Referenc[e] frames", "On" if self.vopt.frame == 1 else "Off" ) self.add_overlay(topleft, "[H]ide Menu", "") if self._image_idx > 0: fname = self._image_path % (self._image_idx - 1) self.add_overlay(topleft, "Cap[t]ure frame", "Saved as %s" % fname) else: self.add_overlay(topleft, "Cap[t]ure frame", "") self.add_overlay(topleft, "Toggle geomgroup visibility", "0-4") self.add_overlay(bottomleft, "FPS", "%d%s" % (1 / self._time_per_render, "")) self.add_overlay( bottomleft, "Solver iterations", str(self.data.solver_iter + 1) ) self.add_overlay( bottomleft, "Step", str(round(self.data.time / self.model.opt.timestep)) ) self.add_overlay(bottomleft, "timestep", "%.5f" % self.model.opt.timestep) def render(self): # mjv_updateScene, mjr_render, mjr_overlay def update(): # fill overlay items self._create_overlay() render_start = time.time() if self.window is None: return elif glfw.window_should_close(self.window): glfw.destroy_window(self.window) glfw.terminate() self.viewport.width, self.viewport.height = glfw.get_framebuffer_size( self.window ) with self._gui_lock: # update scene mujoco.mjv_updateScene( self.model, self.data, self.vopt, mujoco.MjvPerturb(), self.cam, mujoco.mjtCatBit.mjCAT_ALL.value, self.scn, ) # marker items for marker in self._markers: self._add_marker_to_scene(marker) # render mujoco.mjr_render(self.viewport, self.scn, self.con) # overlay items if not self._hide_menu: for gridpos, [t1, t2] in self._overlays.items(): mujoco.mjr_overlay( mujoco.mjtFontScale.mjFONTSCALE_150, gridpos, self.viewport, t1, t2, self.con, ) glfw.swap_buffers(self.window) glfw.poll_events() self._time_per_render = 0.9 * self._time_per_render + 0.1 * ( time.time() - render_start ) # clear overlay self._overlays.clear() if self._paused: while self._paused: update() if self._advance_by_one_step: self._advance_by_one_step = False break else: self._loop_count += self.model.opt.timestep / ( self._time_per_render * self._run_speed ) if self._render_every_frame: self._loop_count = 1 while self._loop_count > 0: update() self._loop_count -= 1 # clear markers self._markers[:] = [] def close(self): glfw.destroy_window(self.window) glfw.terminate() ================================================ FILE: gym/envs/mujoco/pusher.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box class PusherEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 20, } def __init__(self, **kwargs): utils.EzPickle.__init__(self, **kwargs) observation_space = Box(low=-np.inf, high=np.inf, shape=(23,), dtype=np.float64) MuJocoPyEnv.__init__( self, "pusher.xml", 5, observation_space=observation_space, **kwargs ) def step(self, a): vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm") vec_2 = self.get_body_com("object") - self.get_body_com("goal") reward_near = -np.linalg.norm(vec_1) reward_dist = -np.linalg.norm(vec_2) reward_ctrl = -np.square(a).sum() reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near self.do_simulation(a, self.frame_skip) if self.render_mode == "human": self.render() ob = self._get_obs() return ( ob, reward, False, False, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl), ) def viewer_setup(self): assert self.viewer is not None self.viewer.cam.trackbodyid = -1 self.viewer.cam.distance = 4.0 def reset_model(self): qpos = self.init_qpos self.goal_pos = np.asarray([0, 0]) while True: self.cylinder_pos = np.concatenate( [ self.np_random.uniform(low=-0.3, high=0, size=1), self.np_random.uniform(low=-0.2, high=0.2, size=1), ] ) if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17: break qpos[-4:-2] = self.cylinder_pos qpos[-2:] = self.goal_pos qvel = self.init_qvel + self.np_random.uniform( low=-0.005, high=0.005, size=self.model.nv ) qvel[-4:] = 0 self.set_state(qpos, qvel) return self._get_obs() def _get_obs(self): return np.concatenate( [ self.sim.data.qpos.flat[:7], self.sim.data.qvel.flat[:7], self.get_body_com("tips_arm"), self.get_body_com("object"), self.get_body_com("goal"), ] ) ================================================ FILE: gym/envs/mujoco/pusher_v4.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv from gym.spaces import Box class PusherEnv(MujocoEnv, utils.EzPickle): """ ### Description "Pusher" is a multi-jointed robot arm which is very similar to that of a human. The goal is to move a target cylinder (called *object*) to a goal position using the robot's end effector (called *fingertip*). The robot consists of shoulder, elbow, forearm, and wrist joints. ### Action Space The action space is a `Box(-2, 2, (7,), float32)`. An action `(a, b)` represents the torques applied at the hinge joints. | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | |-----|--------------------------------------------------------------------|-------------|-------------|----------------------------------|-------|--------------| | 0 | Rotation of the panning the shoulder | -2 | 2 | r_shoulder_pan_joint | hinge | torque (N m) | | 1 | Rotation of the shoulder lifting joint | -2 | 2 | r_shoulder_lift_joint | hinge | torque (N m) | | 2 | Rotation of the shoulder rolling joint | -2 | 2 | r_upper_arm_roll_joint | hinge | torque (N m) | | 3 | Rotation of hinge joint that flexed the elbow | -2 | 2 | r_elbow_flex_joint | hinge | torque (N m) | | 4 | Rotation of hinge that rolls the forearm | -2 | 2 | r_forearm_roll_joint | hinge | torque (N m) | | 5 | Rotation of flexing the wrist | -2 | 2 | r_wrist_flex_joint | hinge | torque (N m) | | 6 | Rotation of rolling the wrist | -2 | 2 | r_wrist_roll_joint | hinge | torque (N m) | ### Observation Space Observations consist of - Angle of rotational joints on the pusher - Angular velocities of rotational joints on the pusher - The coordinates of the fingertip of the pusher - The coordinates of the object to be moved - The coordinates of the goal position The observation is a `ndarray` with shape `(23,)` where the elements correspond to the table below. An analogy can be drawn to a human arm in order to help understand the state space, with the words flex and roll meaning the same as human joints. | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | | --- | -------------------------------------------------------- | ---- | --- | -------------------------------- | -------- | ------------------------ | | 0 | Rotation of the panning the shoulder | -Inf | Inf | r_shoulder_pan_joint | hinge | angle (rad) | | 1 | Rotation of the shoulder lifting joint | -Inf | Inf | r_shoulder_lift_joint | hinge | angle (rad) | | 2 | Rotation of the shoulder rolling joint | -Inf | Inf | r_upper_arm_roll_joint | hinge | angle (rad) | | 3 | Rotation of hinge joint that flexed the elbow | -Inf | Inf | r_elbow_flex_joint | hinge | angle (rad) | | 4 | Rotation of hinge that rolls the forearm | -Inf | Inf | r_forearm_roll_joint | hinge | angle (rad) | | 5 | Rotation of flexing the wrist | -Inf | Inf | r_wrist_flex_joint | hinge | angle (rad) | | 6 | Rotation of rolling the wrist | -Inf | Inf | r_wrist_roll_joint | hinge | angle (rad) | | 7 | Rotational velocity of the panning the shoulder | -Inf | Inf | r_shoulder_pan_joint | hinge | angular velocity (rad/s) | | 8 | Rotational velocity of the shoulder lifting joint | -Inf | Inf | r_shoulder_lift_joint | hinge | angular velocity (rad/s) | | 9 | Rotational velocity of the shoulder rolling joint | -Inf | Inf | r_upper_arm_roll_joint | hinge | angular velocity (rad/s) | | 10 | Rotational velocity of hinge joint that flexed the elbow | -Inf | Inf | r_elbow_flex_joint | hinge | angular velocity (rad/s) | | 11 | Rotational velocity of hinge that rolls the forearm | -Inf | Inf | r_forearm_roll_joint | hinge | angular velocity (rad/s) | | 12 | Rotational velocity of flexing the wrist | -Inf | Inf | r_wrist_flex_joint | hinge | angular velocity (rad/s) | | 13 | Rotational velocity of rolling the wrist | -Inf | Inf | r_wrist_roll_joint | hinge | angular velocity (rad/s) | | 14 | x-coordinate of the fingertip of the pusher | -Inf | Inf | tips_arm | slide | position (m) | | 15 | y-coordinate of the fingertip of the pusher | -Inf | Inf | tips_arm | slide | position (m) | | 16 | z-coordinate of the fingertip of the pusher | -Inf | Inf | tips_arm | slide | position (m) | | 17 | x-coordinate of the object to be moved | -Inf | Inf | object (obj_slidex) | slide | position (m) | | 18 | y-coordinate of the object to be moved | -Inf | Inf | object (obj_slidey) | slide | position (m) | | 19 | z-coordinate of the object to be moved | -Inf | Inf | object | cylinder | position (m) | | 20 | x-coordinate of the goal position of the object | -Inf | Inf | goal (goal_slidex) | slide | position (m) | | 21 | y-coordinate of the goal position of the object | -Inf | Inf | goal (goal_slidey) | slide | position (m) | | 22 | z-coordinate of the goal position of the object | -Inf | Inf | goal | sphere | position (m) | ### Rewards The reward consists of two parts: - *reward_near *: This reward is a measure of how far the *fingertip* of the pusher (the unattached end) is from the object, with a more negative value assigned for when the pusher's *fingertip* is further away from the target. It is calculated as the negative vector norm of (position of the fingertip - position of target), or *-norm("fingertip" - "target")*. - *reward_dist *: This reward is a measure of how far the object is from the target goal position, with a more negative value assigned for object is further away from the target. It is calculated as the negative vector norm of (position of the object - position of goal), or *-norm("object" - "target")*. - *reward_control*: A negative reward for penalising the pusher if it takes actions that are too large. It is measured as the negative squared Euclidean norm of the action, i.e. as *- sum(action2)*. The total reward returned is ***reward*** *=* *reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near* Unlike other environments, Pusher does not allow you to specify weights for the individual reward terms. However, `info` does contain the keys *reward_dist* and *reward_ctrl*. Thus, if you'd like to weight the terms, you should create a wrapper that computes the weighted reward from `info`. ### Starting State All pusher (not including object and goal) states start in (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0). A uniform noise in the range [-0.005, 0.005] is added to the velocity attributes only. The velocities of the object and goal are permanently set to 0. The object's x-position is selected uniformly between [-0.3, 0] while the y-position is selected uniformly between [-0.2, 0.2], and this process is repeated until the vector norm between the object's (x,y) position and origin is not greater than 0.17. The goal always have the same position of (0.45, -0.05, -0.323). The default framerate is 5 with each frame lasting for 0.01, giving rise to a *dt = 5 * 0.01 = 0.05* ### Episode End The episode ends when any of the following happens: 1. Truncation: The episode duration reaches a 100 timesteps. 2. Termination: Any of the state space values is no longer finite. ### Arguments No additional arguments are currently supported (in v2 and lower), but modifications can be made to the XML file in the assets folder (or by changing the path to a modified XML file in another folder).. ``` env = gym.make('Pusher-v4') ``` There is no v3 for Pusher, unlike the robot environments where a v3 and beyond take gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. ### Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v2: All continuous control environments now use mujoco_py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks (not including reacher, which has a max_time_steps of 50). Added reward_threshold to environments. * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 20, } def __init__(self, **kwargs): utils.EzPickle.__init__(self, **kwargs) observation_space = Box(low=-np.inf, high=np.inf, shape=(23,), dtype=np.float64) MujocoEnv.__init__( self, "pusher.xml", 5, observation_space=observation_space, **kwargs ) def step(self, a): vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm") vec_2 = self.get_body_com("object") - self.get_body_com("goal") reward_near = -np.linalg.norm(vec_1) reward_dist = -np.linalg.norm(vec_2) reward_ctrl = -np.square(a).sum() reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near self.do_simulation(a, self.frame_skip) if self.render_mode == "human": self.render() ob = self._get_obs() return ( ob, reward, False, False, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl), ) def viewer_setup(self): assert self.viewer is not None self.viewer.cam.trackbodyid = -1 self.viewer.cam.distance = 4.0 def reset_model(self): qpos = self.init_qpos self.goal_pos = np.asarray([0, 0]) while True: self.cylinder_pos = np.concatenate( [ self.np_random.uniform(low=-0.3, high=0, size=1), self.np_random.uniform(low=-0.2, high=0.2, size=1), ] ) if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17: break qpos[-4:-2] = self.cylinder_pos qpos[-2:] = self.goal_pos qvel = self.init_qvel + self.np_random.uniform( low=-0.005, high=0.005, size=self.model.nv ) qvel[-4:] = 0 self.set_state(qpos, qvel) return self._get_obs() def _get_obs(self): return np.concatenate( [ self.data.qpos.flat[:7], self.data.qvel.flat[:7], self.get_body_com("tips_arm"), self.get_body_com("object"), self.get_body_com("goal"), ] ) ================================================ FILE: gym/envs/mujoco/reacher.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box class ReacherEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 50, } def __init__(self, **kwargs): utils.EzPickle.__init__(self, **kwargs) observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64) MuJocoPyEnv.__init__( self, "reacher.xml", 2, observation_space=observation_space, **kwargs ) def step(self, a): vec = self.get_body_com("fingertip") - self.get_body_com("target") reward_dist = -np.linalg.norm(vec) reward_ctrl = -np.square(a).sum() reward = reward_dist + reward_ctrl self.do_simulation(a, self.frame_skip) if self.render_mode == "human": self.render() ob = self._get_obs() return ( ob, reward, False, False, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl), ) def viewer_setup(self): assert self.viewer is not None self.viewer.cam.trackbodyid = 0 def reset_model(self): qpos = ( self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos ) while True: self.goal = self.np_random.uniform(low=-0.2, high=0.2, size=2) if np.linalg.norm(self.goal) < 0.2: break qpos[-2:] = self.goal qvel = self.init_qvel + self.np_random.uniform( low=-0.005, high=0.005, size=self.model.nv ) qvel[-2:] = 0 self.set_state(qpos, qvel) return self._get_obs() def _get_obs(self): theta = self.sim.data.qpos.flat[:2] return np.concatenate( [ np.cos(theta), np.sin(theta), self.sim.data.qpos.flat[2:], self.sim.data.qvel.flat[:2], self.get_body_com("fingertip") - self.get_body_com("target"), ] ) ================================================ FILE: gym/envs/mujoco/reacher_v4.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv from gym.spaces import Box class ReacherEnv(MujocoEnv, utils.EzPickle): """ ### Description "Reacher" is a two-jointed robot arm. The goal is to move the robot's end effector (called *fingertip*) close to a target that is spawned at a random position. ### Action Space The action space is a `Box(-1, 1, (2,), float32)`. An action `(a, b)` represents the torques applied at the hinge joints. | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | |-----|---------------------------------------------------------------------------------|-------------|-------------|--------------------------|-------|------| | 0 | Torque applied at the first hinge (connecting the link to the point of fixture) | -1 | 1 | joint0 | hinge | torque (N m) | | 1 | Torque applied at the second hinge (connecting the two links) | -1 | 1 | joint1 | hinge | torque (N m) | ### Observation Space Observations consist of - The cosine of the angles of the two arms - The sine of the angles of the two arms - The coordinates of the target - The angular velocities of the arms - The vector between the target and the reacher's fingertip (3 dimensional with the last element being 0) The observation is a `ndarray` with shape `(11,)` where the elements correspond to the following: | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | | --- | ---------------------------------------------------------------------------------------------- | ---- | --- | -------------------------------- | ----- | ------------------------ | | 0 | cosine of the angle of the first arm | -Inf | Inf | cos(joint0) | hinge | unitless | | 1 | cosine of the angle of the second arm | -Inf | Inf | cos(joint1) | hinge | unitless | | 2 | sine of the angle of the first arm | -Inf | Inf | cos(joint0) | hinge | unitless | | 3 | sine of the angle of the second arm | -Inf | Inf | cos(joint1) | hinge | unitless | | 4 | x-coordinate of the target | -Inf | Inf | target_x | slide | position (m) | | 5 | y-coordinate of the target | -Inf | Inf | target_y | slide | position (m) | | 6 | angular velocity of the first arm | -Inf | Inf | joint0 | hinge | angular velocity (rad/s) | | 7 | angular velocity of the second arm | -Inf | Inf | joint1 | hinge | angular velocity (rad/s) | | 8 | x-value of position_fingertip - position_target | -Inf | Inf | NA | slide | position (m) | | 9 | y-value of position_fingertip - position_target | -Inf | Inf | NA | slide | position (m) | | 10 | z-value of position_fingertip - position_target (0 since reacher is 2d and z is same for both) | -Inf | Inf | NA | slide | position (m) | Most Gym environments just return the positions and velocity of the joints in the `.xml` file as the state of the environment. However, in reacher the state is created by combining only certain elements of the position and velocity, and performing some function transformations on them. If one is to read the `.xml` for reacher then they will find 4 joints: | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | |-----|-----------------------------|----------|----------|----------------------------------|-------|--------------------| | 0 | angle of the first arm | -Inf | Inf | joint0 | hinge | angle (rad) | | 1 | angle of the second arm | -Inf | Inf | joint1 | hinge | angle (rad) | | 2 | x-coordinate of the target | -Inf | Inf | target_x | slide | position (m) | | 3 | y-coordinate of the target | -Inf | Inf | target_y | slide | position (m) | ### Rewards The reward consists of two parts: - *reward_distance*: This reward is a measure of how far the *fingertip* of the reacher (the unattached end) is from the target, with a more negative value assigned for when the reacher's *fingertip* is further away from the target. It is calculated as the negative vector norm of (position of the fingertip - position of target), or *-norm("fingertip" - "target")*. - *reward_control*: A negative reward for penalising the walker if it takes actions that are too large. It is measured as the negative squared Euclidean norm of the action, i.e. as *- sum(action2)*. The total reward returned is ***reward*** *=* *reward_distance + reward_control* Unlike other environments, Reacher does not allow you to specify weights for the individual reward terms. However, `info` does contain the keys *reward_dist* and *reward_ctrl*. Thus, if you'd like to weight the terms, you should create a wrapper that computes the weighted reward from `info`. ### Starting State All observations start in state (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a noise added for stochasticity. A uniform noise in the range [-0.1, 0.1] is added to the positional attributes, while the target position is selected uniformly at random in a disk of radius 0.2 around the origin. Independent, uniform noise in the range of [-0.005, 0.005] is added to the velocities, and the last element ("fingertip" - "target") is calculated at the end once everything is set. The default setting has a framerate of 2 and a *dt = 2 * 0.01 = 0.02* ### Episode End The episode ends when any of the following happens: 1. Truncation: The episode duration reaches a 50 timesteps (with a new random target popping up if the reacher's fingertip reaches it before 50 timesteps) 2. Termination: Any of the state space values is no longer finite. ### Arguments No additional arguments are currently supported (in v2 and lower), but modifications can be made to the XML file in the assets folder (or by changing the path to a modified XML file in another folder).. ``` env = gym.make('Reacher-v4') ``` There is no v3 for Reacher, unlike the robot environments where a v3 and beyond take gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. ### Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v2: All continuous control environments now use mujoco_py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks (not including reacher, which has a max_time_steps of 50). Added reward_threshold to environments. * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 50, } def __init__(self, **kwargs): utils.EzPickle.__init__(self, **kwargs) observation_space = Box(low=-np.inf, high=np.inf, shape=(11,), dtype=np.float64) MujocoEnv.__init__( self, "reacher.xml", 2, observation_space=observation_space, **kwargs ) def step(self, a): vec = self.get_body_com("fingertip") - self.get_body_com("target") reward_dist = -np.linalg.norm(vec) reward_ctrl = -np.square(a).sum() reward = reward_dist + reward_ctrl self.do_simulation(a, self.frame_skip) if self.render_mode == "human": self.render() ob = self._get_obs() return ( ob, reward, False, False, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl), ) def viewer_setup(self): assert self.viewer is not None self.viewer.cam.trackbodyid = 0 def reset_model(self): qpos = ( self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos ) while True: self.goal = self.np_random.uniform(low=-0.2, high=0.2, size=2) if np.linalg.norm(self.goal) < 0.2: break qpos[-2:] = self.goal qvel = self.init_qvel + self.np_random.uniform( low=-0.005, high=0.005, size=self.model.nv ) qvel[-2:] = 0 self.set_state(qpos, qvel) return self._get_obs() def _get_obs(self): theta = self.data.qpos.flat[:2] return np.concatenate( [ np.cos(theta), np.sin(theta), self.data.qpos.flat[2:], self.data.qvel.flat[:2], self.get_body_com("fingertip") - self.get_body_com("target"), ] ) ================================================ FILE: gym/envs/mujoco/swimmer.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box class SwimmerEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 25, } def __init__(self, **kwargs): observation_space = Box(low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64) MuJocoPyEnv.__init__( self, "swimmer.xml", 4, observation_space=observation_space, **kwargs ) utils.EzPickle.__init__(self, **kwargs) def step(self, a): ctrl_cost_coeff = 0.0001 xposbefore = self.sim.data.qpos[0] self.do_simulation(a, self.frame_skip) xposafter = self.sim.data.qpos[0] reward_fwd = (xposafter - xposbefore) / self.dt reward_ctrl = -ctrl_cost_coeff * np.square(a).sum() reward = reward_fwd + reward_ctrl ob = self._get_obs() if self.render_mode == "human": self.render() return ( ob, reward, False, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl), ) def _get_obs(self): qpos = self.sim.data.qpos qvel = self.sim.data.qvel return np.concatenate([qpos.flat[2:], qvel.flat]) def reset_model(self): self.set_state( self.init_qpos + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq), self.init_qvel + self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nv), ) return self._get_obs() ================================================ FILE: gym/envs/mujoco/swimmer_v3.py ================================================ __credits__ = ["Rushiv Arora"] import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box DEFAULT_CAMERA_CONFIG = {} class SwimmerEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 25, } def __init__( self, xml_file="swimmer.xml", forward_reward_weight=1.0, ctrl_cost_weight=1e-4, reset_noise_scale=0.1, exclude_current_positions_from_observation=True, **kwargs ): utils.EzPickle.__init__( self, xml_file, forward_reward_weight, ctrl_cost_weight, reset_noise_scale, exclude_current_positions_from_observation, **kwargs ) self._forward_reward_weight = forward_reward_weight self._ctrl_cost_weight = ctrl_cost_weight self._reset_noise_scale = reset_noise_scale self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) if exclude_current_positions_from_observation: observation_space = Box( low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64 ) else: observation_space = Box( low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64 ) MuJocoPyEnv.__init__( self, xml_file, 4, observation_space=observation_space, **kwargs ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) return control_cost def step(self, action): xy_position_before = self.sim.data.qpos[0:2].copy() self.do_simulation(action, self.frame_skip) xy_position_after = self.sim.data.qpos[0:2].copy() xy_velocity = (xy_position_after - xy_position_before) / self.dt x_velocity, y_velocity = xy_velocity forward_reward = self._forward_reward_weight * x_velocity ctrl_cost = self.control_cost(action) observation = self._get_obs() reward = forward_reward - ctrl_cost info = { "reward_fwd": forward_reward, "reward_ctrl": -ctrl_cost, "x_position": xy_position_after[0], "y_position": xy_position_after[1], "distance_from_origin": np.linalg.norm(xy_position_after, ord=2), "x_velocity": x_velocity, "y_velocity": y_velocity, "forward_reward": forward_reward, } if self.render_mode == "human": self.render() return observation, reward, False, False, info def _get_obs(self): position = self.sim.data.qpos.flat.copy() velocity = self.sim.data.qvel.flat.copy() if self._exclude_current_positions_from_observation: position = position[2:] observation = np.concatenate([position, velocity]).ravel() return observation def reset_model(self): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale qpos = self.init_qpos + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nq ) qvel = self.init_qvel + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nv ) self.set_state(qpos, qvel) observation = self._get_obs() return observation def viewer_setup(self): assert self.viewer is not None for key, value in DEFAULT_CAMERA_CONFIG.items(): if isinstance(value, np.ndarray): getattr(self.viewer.cam, key)[:] = value else: setattr(self.viewer.cam, key, value) ================================================ FILE: gym/envs/mujoco/swimmer_v4.py ================================================ __credits__ = ["Rushiv Arora"] import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv from gym.spaces import Box DEFAULT_CAMERA_CONFIG = {} class SwimmerEnv(MujocoEnv, utils.EzPickle): """ ### Description This environment corresponds to the Swimmer environment described in Rémi Coulom's PhD thesis ["Reinforcement Learning Using Neural Networks, with Applications to Motor Control"](https://tel.archives-ouvertes.fr/tel-00003985/document). The environment aims to increase the number of independent state and control variables as compared to the classic control environments. The swimmers consist of three or more segments ('***links***') and one less articulation joints ('***rotors***') - one rotor joint connecting exactly two links to form a linear chain. The swimmer is suspended in a two dimensional pool and always starts in the same position (subject to some deviation drawn from an uniform distribution), and the goal is to move as fast as possible towards the right by applying torque on the rotors and using the fluids friction. ### Notes The problem parameters are: Problem parameters: * *n*: number of body parts * *mi*: mass of part *i* (*i* ∈ {1...n}) * *li*: length of part *i* (*i* ∈ {1...n}) * *k*: viscous-friction coefficient While the default environment has *n* = 3, *li* = 0.1, and *k* = 0.1. It is possible to pass a custom MuJoCo XML file during construction to increase the number of links, or to tweak any of the parameters. ### Action Space The action space is a `Box(-1, 1, (2,), float32)`. An action represents the torques applied between *links* | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | |-----|------------------------------------|-------------|-------------|----------------------------------|-------|--------------| | 0 | Torque applied on the first rotor | -1 | 1 | motor1_rot | hinge | torque (N m) | | 1 | Torque applied on the second rotor | -1 | 1 | motor2_rot | hinge | torque (N m) | ### Observation Space By default, observations consists of: * θi: angle of part *i* with respect to the *x* axis * θi': its derivative with respect to time (angular velocity) In the default case, observations do not include the x- and y-coordinates of the front tip. These may be included by passing `exclude_current_positions_from_observation=False` during construction. Then, the observation space will have 10 dimensions where the first two dimensions represent the x- and y-coordinates of the front tip. Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x- and y-coordinates will be returned in `info` with keys `"x_position"` and `"y_position"`, respectively. By default, the observation is a `ndarray` with shape `(8,)` where the elements correspond to the following: | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | | --- | ------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ | | 0 | angle of the front tip | -Inf | Inf | free_body_rot | hinge | angle (rad) | | 1 | angle of the first rotor | -Inf | Inf | motor1_rot | hinge | angle (rad) | | 2 | angle of the second rotor | -Inf | Inf | motor2_rot | hinge | angle (rad) | | 3 | velocity of the tip along the x-axis | -Inf | Inf | slider1 | slide | velocity (m/s) | | 4 | velocity of the tip along the y-axis | -Inf | Inf | slider2 | slide | velocity (m/s) | | 5 | angular velocity of front tip | -Inf | Inf | free_body_rot | hinge | angular velocity (rad/s) | | 6 | angular velocity of first rotor | -Inf | Inf | motor1_rot | hinge | angular velocity (rad/s) | | 7 | angular velocity of second rotor | -Inf | Inf | motor2_rot | hinge | angular velocity (rad/s) | ### Rewards The reward consists of two parts: - *forward_reward*: A reward of moving forward which is measured as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is the time between actions and is dependent on the frame_skip parameter (default is 4), where the frametime is 0.01 - making the default *dt = 4 * 0.01 = 0.04*. This reward would be positive if the swimmer swims right as desired. - *ctrl_cost*: A cost for penalising the swimmer if it takes actions that are too large. It is measured as *`ctrl_cost_weight` * sum(action2)* where *`ctrl_cost_weight`* is a parameter set for the control and has a default value of 1e-4 The total reward returned is ***reward*** *=* *forward_reward - ctrl_cost* and `info` will also contain the individual reward terms ### Starting State All observations start in state (0,0,0,0,0,0,0,0) with a Uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] is added to the initial state for stochasticity. ### Episode End The episode truncates when the episode length is greater than 1000. ### Arguments No additional arguments are currently supported in v2 and lower. ``` gym.make('Swimmer-v4') ``` v3 and v4 take gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. ``` env = gym.make('Swimmer-v4', ctrl_cost_weight=0.1, ....) ``` | Parameter | Type | Default | Description | | -------------------------------------------- | --------- | --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `xml_file` | **str** | `"swimmer.xml"` | Path to a MuJoCo model | | `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) | | `ctrl_cost_weight` | **float** | `1e-4` | Weight for _ctrl_cost_ term (see section on reward) | | `reset_noise_scale` | **float** | `0.1` | Scale of random perturbations of initial position and velocity (see section on Starting State) | | `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x- and y-coordinates from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies | ### Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco_py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 25, } def __init__( self, forward_reward_weight=1.0, ctrl_cost_weight=1e-4, reset_noise_scale=0.1, exclude_current_positions_from_observation=True, **kwargs ): utils.EzPickle.__init__( self, forward_reward_weight, ctrl_cost_weight, reset_noise_scale, exclude_current_positions_from_observation, **kwargs ) self._forward_reward_weight = forward_reward_weight self._ctrl_cost_weight = ctrl_cost_weight self._reset_noise_scale = reset_noise_scale self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) if exclude_current_positions_from_observation: observation_space = Box( low=-np.inf, high=np.inf, shape=(8,), dtype=np.float64 ) else: observation_space = Box( low=-np.inf, high=np.inf, shape=(10,), dtype=np.float64 ) MujocoEnv.__init__( self, "swimmer.xml", 4, observation_space=observation_space, **kwargs ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) return control_cost def step(self, action): xy_position_before = self.data.qpos[0:2].copy() self.do_simulation(action, self.frame_skip) xy_position_after = self.data.qpos[0:2].copy() xy_velocity = (xy_position_after - xy_position_before) / self.dt x_velocity, y_velocity = xy_velocity forward_reward = self._forward_reward_weight * x_velocity ctrl_cost = self.control_cost(action) observation = self._get_obs() reward = forward_reward - ctrl_cost info = { "reward_fwd": forward_reward, "reward_ctrl": -ctrl_cost, "x_position": xy_position_after[0], "y_position": xy_position_after[1], "distance_from_origin": np.linalg.norm(xy_position_after, ord=2), "x_velocity": x_velocity, "y_velocity": y_velocity, "forward_reward": forward_reward, } if self.render_mode == "human": self.render() return observation, reward, False, False, info def _get_obs(self): position = self.data.qpos.flat.copy() velocity = self.data.qvel.flat.copy() if self._exclude_current_positions_from_observation: position = position[2:] observation = np.concatenate([position, velocity]).ravel() return observation def reset_model(self): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale qpos = self.init_qpos + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nq ) qvel = self.init_qvel + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nv ) self.set_state(qpos, qvel) observation = self._get_obs() return observation def viewer_setup(self): assert self.viewer is not None for key, value in DEFAULT_CAMERA_CONFIG.items(): if isinstance(value, np.ndarray): getattr(self.viewer.cam, key)[:] = value else: setattr(self.viewer.cam, key, value) ================================================ FILE: gym/envs/mujoco/walker2d.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box class Walker2dEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 125, } def __init__(self, **kwargs): observation_space = Box(low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64) MuJocoPyEnv.__init__( self, "walker2d.xml", 4, observation_space=observation_space, **kwargs ) utils.EzPickle.__init__(self, **kwargs) def step(self, a): posbefore = self.sim.data.qpos[0] self.do_simulation(a, self.frame_skip) posafter, height, ang = self.sim.data.qpos[0:3] alive_bonus = 1.0 reward = (posafter - posbefore) / self.dt reward += alive_bonus reward -= 1e-3 * np.square(a).sum() terminated = not (height > 0.8 and height < 2.0 and ang > -1.0 and ang < 1.0) ob = self._get_obs() if self.render_mode == "human": self.render() return ob, reward, terminated, False, {} def _get_obs(self): qpos = self.sim.data.qpos qvel = self.sim.data.qvel return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel() def reset_model(self): self.set_state( self.init_qpos + self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nq), self.init_qvel + self.np_random.uniform(low=-0.005, high=0.005, size=self.model.nv), ) return self._get_obs() def viewer_setup(self): assert self.viewer is not None self.viewer.cam.trackbodyid = 2 self.viewer.cam.distance = self.model.stat.extent * 0.5 self.viewer.cam.lookat[2] = 1.15 self.viewer.cam.elevation = -20 ================================================ FILE: gym/envs/mujoco/walker2d_v3.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MuJocoPyEnv from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "trackbodyid": 2, "distance": 4.0, "lookat": np.array((0.0, 0.0, 1.15)), "elevation": -20.0, } class Walker2dEnv(MuJocoPyEnv, utils.EzPickle): metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 125, } def __init__( self, xml_file="walker2d.xml", forward_reward_weight=1.0, ctrl_cost_weight=1e-3, healthy_reward=1.0, terminate_when_unhealthy=True, healthy_z_range=(0.8, 2.0), healthy_angle_range=(-1.0, 1.0), reset_noise_scale=5e-3, exclude_current_positions_from_observation=True, **kwargs ): utils.EzPickle.__init__( self, xml_file, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, healthy_z_range, healthy_angle_range, reset_noise_scale, exclude_current_positions_from_observation, **kwargs ) self._forward_reward_weight = forward_reward_weight self._ctrl_cost_weight = ctrl_cost_weight self._healthy_reward = healthy_reward self._terminate_when_unhealthy = terminate_when_unhealthy self._healthy_z_range = healthy_z_range self._healthy_angle_range = healthy_angle_range self._reset_noise_scale = reset_noise_scale self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) if exclude_current_positions_from_observation: observation_space = Box( low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64 ) else: observation_space = Box( low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 ) MuJocoPyEnv.__init__( self, xml_file, 4, observation_space=observation_space, **kwargs ) @property def healthy_reward(self): return ( float(self.is_healthy or self._terminate_when_unhealthy) * self._healthy_reward ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) return control_cost @property def is_healthy(self): z, angle = self.sim.data.qpos[1:3] min_z, max_z = self._healthy_z_range min_angle, max_angle = self._healthy_angle_range healthy_z = min_z < z < max_z healthy_angle = min_angle < angle < max_angle is_healthy = healthy_z and healthy_angle return is_healthy @property def terminated(self): terminated = not self.is_healthy if self._terminate_when_unhealthy else False return terminated def _get_obs(self): position = self.sim.data.qpos.flat.copy() velocity = np.clip(self.sim.data.qvel.flat.copy(), -10, 10) if self._exclude_current_positions_from_observation: position = position[1:] observation = np.concatenate((position, velocity)).ravel() return observation def step(self, action): x_position_before = self.sim.data.qpos[0] self.do_simulation(action, self.frame_skip) x_position_after = self.sim.data.qpos[0] x_velocity = (x_position_after - x_position_before) / self.dt ctrl_cost = self.control_cost(action) forward_reward = self._forward_reward_weight * x_velocity healthy_reward = self.healthy_reward rewards = forward_reward + healthy_reward costs = ctrl_cost observation = self._get_obs() reward = rewards - costs terminated = self.terminated info = { "x_position": x_position_after, "x_velocity": x_velocity, } if self.render_mode == "human": self.render() return observation, reward, terminated, False, info def reset_model(self): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale qpos = self.init_qpos + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nq ) qvel = self.init_qvel + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nv ) self.set_state(qpos, qvel) observation = self._get_obs() return observation def viewer_setup(self): assert self.viewer is not None for key, value in DEFAULT_CAMERA_CONFIG.items(): if isinstance(value, np.ndarray): getattr(self.viewer.cam, key)[:] = value else: setattr(self.viewer.cam, key, value) ================================================ FILE: gym/envs/mujoco/walker2d_v4.py ================================================ import numpy as np from gym import utils from gym.envs.mujoco import MujocoEnv from gym.spaces import Box DEFAULT_CAMERA_CONFIG = { "trackbodyid": 2, "distance": 4.0, "lookat": np.array((0.0, 0.0, 1.15)), "elevation": -20.0, } class Walker2dEnv(MujocoEnv, utils.EzPickle): """ ### Description This environment builds on the hopper environment based on the work done by Erez, Tassa, and Todorov in ["Infinite Horizon Model Predictive Control for Nonlinear Periodic Tasks"](http://www.roboticsproceedings.org/rss07/p10.pdf) by adding another set of legs making it possible for the robot to walker forward instead of hop. Like other Mujoco environments, this environment aims to increase the number of independent state and control variables as compared to the classic control environments. The walker is a two-dimensional two-legged figure that consist of four main body parts - a single torso at the top (with the two legs splitting after the torso), two thighs in the middle below the torso, two legs in the bottom below the thighs, and two feet attached to the legs on which the entire body rests. The goal is to make coordinate both sets of feet, legs, and thighs to move in the forward (right) direction by applying torques on the six hinges connecting the six body parts. ### Action Space The action space is a `Box(-1, 1, (6,), float32)`. An action represents the torques applied at the hinge joints. | Num | Action | Control Min | Control Max | Name (in corresponding XML file) | Joint | Unit | |-----|----------------------------------------|-------------|-------------|----------------------------------|-------|--------------| | 0 | Torque applied on the thigh rotor | -1 | 1 | thigh_joint | hinge | torque (N m) | | 1 | Torque applied on the leg rotor | -1 | 1 | leg_joint | hinge | torque (N m) | | 2 | Torque applied on the foot rotor | -1 | 1 | foot_joint | hinge | torque (N m) | | 3 | Torque applied on the left thigh rotor | -1 | 1 | thigh_left_joint | hinge | torque (N m) | | 4 | Torque applied on the left leg rotor | -1 | 1 | leg_left_joint | hinge | torque (N m) | | 5 | Torque applied on the left foot rotor | -1 | 1 | foot_left_joint | hinge | torque (N m) | ### Observation Space Observations consist of positional values of different body parts of the walker, followed by the velocities of those individual parts (their derivatives) with all the positions ordered before all the velocities. By default, observations do not include the x-coordinate of the top. It may be included by passing `exclude_current_positions_from_observation=False` during construction. In that case, the observation space will have 18 dimensions where the first dimension represent the x-coordinates of the top of the walker. Regardless of whether `exclude_current_positions_from_observation` was set to true or false, the x-coordinate of the top will be returned in `info` with key `"x_position"`. By default, observation is a `ndarray` with shape `(17,)` where the elements correspond to the following: | Num | Observation | Min | Max | Name (in corresponding XML file) | Joint | Unit | | --- | ------------------------------------------------ | ---- | --- | -------------------------------- | ----- | ------------------------ | | 0 | z-coordinate of the top (height of hopper) | -Inf | Inf | rootz (torso) | slide | position (m) | | 1 | angle of the top | -Inf | Inf | rooty (torso) | hinge | angle (rad) | | 2 | angle of the thigh joint | -Inf | Inf | thigh_joint | hinge | angle (rad) | | 3 | angle of the leg joint | -Inf | Inf | leg_joint | hinge | angle (rad) | | 4 | angle of the foot joint | -Inf | Inf | foot_joint | hinge | angle (rad) | | 5 | angle of the left thigh joint | -Inf | Inf | thigh_left_joint | hinge | angle (rad) | | 6 | angle of the left leg joint | -Inf | Inf | leg_left_joint | hinge | angle (rad) | | 7 | angle of the left foot joint | -Inf | Inf | foot_left_joint | hinge | angle (rad) | | 8 | velocity of the x-coordinate of the top | -Inf | Inf | rootx | slide | velocity (m/s) | | 9 | velocity of the z-coordinate (height) of the top | -Inf | Inf | rootz | slide | velocity (m/s) | | 10 | angular velocity of the angle of the top | -Inf | Inf | rooty | hinge | angular velocity (rad/s) | | 11 | angular velocity of the thigh hinge | -Inf | Inf | thigh_joint | hinge | angular velocity (rad/s) | | 12 | angular velocity of the leg hinge | -Inf | Inf | leg_joint | hinge | angular velocity (rad/s) | | 13 | angular velocity of the foot hinge | -Inf | Inf | foot_joint | hinge | angular velocity (rad/s) | | 14 | angular velocity of the thigh hinge | -Inf | Inf | thigh_left_joint | hinge | angular velocity (rad/s) | | 15 | angular velocity of the leg hinge | -Inf | Inf | leg_left_joint | hinge | angular velocity (rad/s) | | 16 | angular velocity of the foot hinge | -Inf | Inf | foot_left_joint | hinge | angular velocity (rad/s) | ### Rewards The reward consists of three parts: - *healthy_reward*: Every timestep that the walker is alive, it receives a fixed reward of value `healthy_reward`, - *forward_reward*: A reward of walking forward which is measured as *`forward_reward_weight` * (x-coordinate before action - x-coordinate after action)/dt*. *dt* is the time between actions and is dependeent on the frame_skip parameter (default is 4), where the frametime is 0.002 - making the default *dt = 4 * 0.002 = 0.008*. This reward would be positive if the walker walks forward (right) desired. - *ctrl_cost*: A cost for penalising the walker if it takes actions that are too large. It is measured as *`ctrl_cost_weight` * sum(action2)* where *`ctrl_cost_weight`* is a parameter set for the control and has a default value of 0.001 The total reward returned is ***reward*** *=* *healthy_reward bonus + forward_reward - ctrl_cost* and `info` will also contain the individual reward terms ### Starting State All observations start in state (0.0, 1.25, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) with a uniform noise in the range of [-`reset_noise_scale`, `reset_noise_scale`] added to the values for stochasticity. ### Episode End The walker is said to be unhealthy if any of the following happens: 1. Any of the state space values is no longer finite 2. The height of the walker is ***not*** in the closed interval specified by `healthy_z_range` 3. The absolute value of the angle (`observation[1]` if `exclude_current_positions_from_observation=False`, else `observation[2]`) is ***not*** in the closed interval specified by `healthy_angle_range` If `terminate_when_unhealthy=True` is passed during construction (which is the default), the episode ends when any of the following happens: 1. Truncation: The episode duration reaches a 1000 timesteps 2. Termination: The walker is unhealthy If `terminate_when_unhealthy=False` is passed, the episode is ended only when 1000 timesteps are exceeded. ### Arguments No additional arguments are currently supported in v2 and lower. ``` env = gym.make('Walker2d-v4') ``` v3 and beyond take gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. ``` env = gym.make('Walker2d-v4', ctrl_cost_weight=0.1, ....) ``` | Parameter | Type | Default | Description | | -------------------------------------------- | --------- | ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | | `xml_file` | **str** | `"walker2d.xml"` | Path to a MuJoCo model | | `forward_reward_weight` | **float** | `1.0` | Weight for _forward_reward_ term (see section on reward) | | `ctrl_cost_weight` | **float** | `1e-3` | Weight for _ctr_cost_ term (see section on reward) | | `healthy_reward` | **float** | `1.0` | Constant reward given if the ant is "healthy" after timestep | | `terminate_when_unhealthy` | **bool** | `True` | If true, issue a done signal if the z-coordinate of the walker is no longer healthy | | `healthy_z_range` | **tuple** | `(0.8, 2)` | The z-coordinate of the top of the walker must be in this range to be considered healthy | | `healthy_angle_range` | **tuple** | `(-1, 1)` | The angle must be in this range to be considered healthy | | `reset_noise_scale` | **float** | `5e-3` | Scale of random perturbations of initial position and velocity (see section on Starting State) | | `exclude_current_positions_from_observation` | **bool** | `True` | Whether or not to omit the x-coordinate from observations. Excluding the position can serve as an inductive bias to induce position-agnostic behavior in policies | ### Version History * v4: all mujoco environments now use the mujoco bindings in mujoco>=2.1.3 * v3: support for gym.make kwargs such as xml_file, ctrl_cost_weight, reset_noise_scale etc. rgb rendering comes from tracking camera (so agent does not run away from screen) * v2: All continuous control environments now use mujoco_py >= 1.50 * v1: max_time_steps raised to 1000 for robot based tasks. Added reward_threshold to environments. * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": [ "human", "rgb_array", "depth_array", ], "render_fps": 125, } def __init__( self, forward_reward_weight=1.0, ctrl_cost_weight=1e-3, healthy_reward=1.0, terminate_when_unhealthy=True, healthy_z_range=(0.8, 2.0), healthy_angle_range=(-1.0, 1.0), reset_noise_scale=5e-3, exclude_current_positions_from_observation=True, **kwargs ): utils.EzPickle.__init__( self, forward_reward_weight, ctrl_cost_weight, healthy_reward, terminate_when_unhealthy, healthy_z_range, healthy_angle_range, reset_noise_scale, exclude_current_positions_from_observation, **kwargs ) self._forward_reward_weight = forward_reward_weight self._ctrl_cost_weight = ctrl_cost_weight self._healthy_reward = healthy_reward self._terminate_when_unhealthy = terminate_when_unhealthy self._healthy_z_range = healthy_z_range self._healthy_angle_range = healthy_angle_range self._reset_noise_scale = reset_noise_scale self._exclude_current_positions_from_observation = ( exclude_current_positions_from_observation ) if exclude_current_positions_from_observation: observation_space = Box( low=-np.inf, high=np.inf, shape=(17,), dtype=np.float64 ) else: observation_space = Box( low=-np.inf, high=np.inf, shape=(18,), dtype=np.float64 ) MujocoEnv.__init__( self, "walker2d.xml", 4, observation_space=observation_space, **kwargs ) @property def healthy_reward(self): return ( float(self.is_healthy or self._terminate_when_unhealthy) * self._healthy_reward ) def control_cost(self, action): control_cost = self._ctrl_cost_weight * np.sum(np.square(action)) return control_cost @property def is_healthy(self): z, angle = self.data.qpos[1:3] min_z, max_z = self._healthy_z_range min_angle, max_angle = self._healthy_angle_range healthy_z = min_z < z < max_z healthy_angle = min_angle < angle < max_angle is_healthy = healthy_z and healthy_angle return is_healthy @property def terminated(self): terminated = not self.is_healthy if self._terminate_when_unhealthy else False return terminated def _get_obs(self): position = self.data.qpos.flat.copy() velocity = np.clip(self.data.qvel.flat.copy(), -10, 10) if self._exclude_current_positions_from_observation: position = position[1:] observation = np.concatenate((position, velocity)).ravel() return observation def step(self, action): x_position_before = self.data.qpos[0] self.do_simulation(action, self.frame_skip) x_position_after = self.data.qpos[0] x_velocity = (x_position_after - x_position_before) / self.dt ctrl_cost = self.control_cost(action) forward_reward = self._forward_reward_weight * x_velocity healthy_reward = self.healthy_reward rewards = forward_reward + healthy_reward costs = ctrl_cost observation = self._get_obs() reward = rewards - costs terminated = self.terminated info = { "x_position": x_position_after, "x_velocity": x_velocity, } if self.render_mode == "human": self.render() return observation, reward, terminated, False, info def reset_model(self): noise_low = -self._reset_noise_scale noise_high = self._reset_noise_scale qpos = self.init_qpos + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nq ) qvel = self.init_qvel + self.np_random.uniform( low=noise_low, high=noise_high, size=self.model.nv ) self.set_state(qpos, qvel) observation = self._get_obs() return observation def viewer_setup(self): assert self.viewer is not None for key, value in DEFAULT_CAMERA_CONFIG.items(): if isinstance(value, np.ndarray): getattr(self.viewer.cam, key)[:] = value else: setattr(self.viewer.cam, key, value) ================================================ FILE: gym/envs/registration.py ================================================ import contextlib import copy import difflib import importlib import importlib.util import re import sys import warnings from dataclasses import dataclass, field from typing import ( Callable, Dict, List, Optional, Sequence, SupportsFloat, Tuple, Union, overload, ) import numpy as np from gym.wrappers import ( AutoResetWrapper, HumanRendering, OrderEnforcing, RenderCollection, TimeLimit, ) from gym.wrappers.compatibility import EnvCompatibility from gym.wrappers.env_checker import PassiveEnvChecker if sys.version_info < (3, 10): import importlib_metadata as metadata # type: ignore else: import importlib.metadata as metadata if sys.version_info >= (3, 8): from typing import Literal else: from typing_extensions import Literal from gym import Env, error, logger ENV_ID_RE = re.compile( r"^(?:(?P[\w:-]+)\/)?(?:(?P[\w:.-]+?))(?:-v(?P\d+))?$" ) def load(name: str) -> callable: """Loads an environment with name and returns an environment creation function Args: name: The environment name Returns: Calls the environment constructor """ mod_name, attr_name = name.split(":") mod = importlib.import_module(mod_name) fn = getattr(mod, attr_name) return fn def parse_env_id(id: str) -> Tuple[Optional[str], str, Optional[int]]: """Parse environment ID string format. This format is true today, but it's *not* an official spec. [namespace/](env-name)-v(version) env-name is group 1, version is group 2 2016-10-31: We're experimentally expanding the environment ID format to include an optional namespace. Args: id: The environment id to parse Returns: A tuple of environment namespace, environment name and version number Raises: Error: If the environment id does not a valid environment regex """ match = ENV_ID_RE.fullmatch(id) if not match: raise error.Error( f"Malformed environment ID: {id}." f"(Currently all IDs must be of the form [namespace/](env-name)-v(version). (namespace is optional))" ) namespace, name, version = match.group("namespace", "name", "version") if version is not None: version = int(version) return namespace, name, version def get_env_id(ns: Optional[str], name: str, version: Optional[int]) -> str: """Get the full env ID given a name and (optional) version and namespace. Inverse of :meth:`parse_env_id`. Args: ns: The environment namespace name: The environment name version: The environment version Returns: The environment id """ full_name = name if version is not None: full_name += f"-v{version}" if ns is not None: full_name = ns + "/" + full_name return full_name @dataclass class EnvSpec: """A specification for creating environments with `gym.make`. * id: The string used to create the environment with `gym.make` * entry_point: The location of the environment to create from * reward_threshold: The reward threshold for completing the environment. * nondeterministic: If the observation of an environment cannot be repeated with the same initial state, random number generator state and actions. * max_episode_steps: The max number of steps that the environment can take before truncation * order_enforce: If to enforce the order of `reset` before `step` and `render` functions * autoreset: If to automatically reset the environment on episode end * disable_env_checker: If to disable the environment checker wrapper in `gym.make`, by default False (runs the environment checker) * kwargs: Additional keyword arguments passed to the environments through `gym.make` """ id: str entry_point: Union[Callable, str] # Environment attributes reward_threshold: Optional[float] = field(default=None) nondeterministic: bool = field(default=False) # Wrappers max_episode_steps: Optional[int] = field(default=None) order_enforce: bool = field(default=True) autoreset: bool = field(default=False) disable_env_checker: bool = field(default=False) apply_api_compatibility: bool = field(default=False) # Environment arguments kwargs: dict = field(default_factory=dict) # post-init attributes namespace: Optional[str] = field(init=False) name: str = field(init=False) version: Optional[int] = field(init=False) def __post_init__(self): # Initialize namespace, name, version self.namespace, self.name, self.version = parse_env_id(self.id) def make(self, **kwargs) -> Env: # For compatibility purposes return make(self, **kwargs) def _check_namespace_exists(ns: Optional[str]): """Check if a namespace exists. If it doesn't, print a helpful error message.""" if ns is None: return namespaces = { spec_.namespace for spec_ in registry.values() if spec_.namespace is not None } if ns in namespaces: return suggestion = ( difflib.get_close_matches(ns, namespaces, n=1) if len(namespaces) > 0 else None ) suggestion_msg = ( f"Did you mean: `{suggestion[0]}`?" if suggestion else f"Have you installed the proper package for {ns}?" ) raise error.NamespaceNotFound(f"Namespace {ns} not found. {suggestion_msg}") def _check_name_exists(ns: Optional[str], name: str): """Check if an env exists in a namespace. If it doesn't, print a helpful error message.""" _check_namespace_exists(ns) names = {spec_.name for spec_ in registry.values() if spec_.namespace == ns} if name in names: return suggestion = difflib.get_close_matches(name, names, n=1) namespace_msg = f" in namespace {ns}" if ns else "" suggestion_msg = f"Did you mean: `{suggestion[0]}`?" if suggestion else "" raise error.NameNotFound( f"Environment {name} doesn't exist{namespace_msg}. {suggestion_msg}" ) def _check_version_exists(ns: Optional[str], name: str, version: Optional[int]): """Check if an env version exists in a namespace. If it doesn't, print a helpful error message. This is a complete test whether an environment identifier is valid, and will provide the best available hints. Args: ns: The environment namespace name: The environment space version: The environment version Raises: DeprecatedEnv: The environment doesn't exist but a default version does VersionNotFound: The ``version`` used doesn't exist DeprecatedEnv: Environment version is deprecated """ if get_env_id(ns, name, version) in registry: return _check_name_exists(ns, name) if version is None: return message = f"Environment version `v{version}` for environment `{get_env_id(ns, name, None)}` doesn't exist." env_specs = [ spec_ for spec_ in registry.values() if spec_.namespace == ns and spec_.name == name ] env_specs = sorted(env_specs, key=lambda spec_: int(spec_.version or -1)) default_spec = [spec_ for spec_ in env_specs if spec_.version is None] if default_spec: message += f" It provides the default version {default_spec[0].id}`." if len(env_specs) == 1: raise error.DeprecatedEnv(message) # Process possible versioned environments versioned_specs = [spec_ for spec_ in env_specs if spec_.version is not None] latest_spec = max(versioned_specs, key=lambda spec: spec.version, default=None) # type: ignore if latest_spec is not None and version > latest_spec.version: version_list_msg = ", ".join(f"`v{spec_.version}`" for spec_ in env_specs) message += f" It provides versioned environments: [ {version_list_msg} ]." raise error.VersionNotFound(message) if latest_spec is not None and version < latest_spec.version: raise error.DeprecatedEnv( f"Environment version v{version} for `{get_env_id(ns, name, None)}` is deprecated. " f"Please use `{latest_spec.id}` instead." ) def find_highest_version(ns: Optional[str], name: str) -> Optional[int]: version: List[int] = [ spec_.version for spec_ in registry.values() if spec_.namespace == ns and spec_.name == name and spec_.version is not None ] return max(version, default=None) def load_env_plugins(entry_point: str = "gym.envs") -> None: # Load third-party environments for plugin in metadata.entry_points(group=entry_point): # Python 3.8 doesn't support plugin.module, plugin.attr # So we'll have to try and parse this ourselves module, attr = None, None try: module, attr = plugin.module, plugin.attr # type: ignore ## error: Cannot access member "attr" for type "EntryPoint" except AttributeError: if ":" in plugin.value: module, attr = plugin.value.split(":", maxsplit=1) else: module, attr = plugin.value, None except Exception as e: warnings.warn( f"While trying to load plugin `{plugin}` from {entry_point}, an exception occurred: {e}" ) module, attr = None, None finally: if attr is None: raise error.Error( f"Gym environment plugin `{module}` must specify a function to execute, not a root module" ) context = namespace(plugin.name) if plugin.name.startswith("__") and plugin.name.endswith("__"): # `__internal__` is an artifact of the plugin system when # the root namespace had an allow-list. The allow-list is now # removed and plugins can register environments in the root # namespace with the `__root__` magic key. if plugin.name == "__root__" or plugin.name == "__internal__": context = contextlib.nullcontext() else: logger.warn( f"The environment namespace magic key `{plugin.name}` is unsupported. " "To register an environment at the root namespace you should specify the `__root__` namespace." ) with context: fn = plugin.load() try: fn() except Exception as e: logger.warn(str(e)) # fmt: off @overload def make(id: str, **kwargs) -> Env: ... @overload def make(id: EnvSpec, **kwargs) -> Env: ... # Classic control # ---------------------------------------- @overload def make(id: Literal["CartPole-v0", "CartPole-v1"], **kwargs) -> Env[np.ndarray, Union[np.ndarray, int]]: ... @overload def make(id: Literal["MountainCar-v0"], **kwargs) -> Env[np.ndarray, Union[np.ndarray, int]]: ... @overload def make(id: Literal["MountainCarContinuous-v0"], **kwargs) -> Env[np.ndarray, Union[np.ndarray, Sequence[SupportsFloat]]]: ... @overload def make(id: Literal["Pendulum-v1"], **kwargs) -> Env[np.ndarray, Union[np.ndarray, Sequence[SupportsFloat]]]: ... @overload def make(id: Literal["Acrobot-v1"], **kwargs) -> Env[np.ndarray, Union[np.ndarray, int]]: ... # Box2d # ---------------------------------------- @overload def make(id: Literal["LunarLander-v2", "LunarLanderContinuous-v2"], **kwargs) -> Env[np.ndarray, Union[np.ndarray, int]]: ... @overload def make(id: Literal["BipedalWalker-v3", "BipedalWalkerHardcore-v3"], **kwargs) -> Env[np.ndarray, Union[np.ndarray, Sequence[SupportsFloat]]]: ... @overload def make(id: Literal["CarRacing-v2"], **kwargs) -> Env[np.ndarray, Union[np.ndarray, Sequence[SupportsFloat]]]: ... # Toy Text # ---------------------------------------- @overload def make(id: Literal["Blackjack-v1"], **kwargs) -> Env[np.ndarray, Union[np.ndarray, int]]: ... @overload def make(id: Literal["FrozenLake-v1", "FrozenLake8x8-v1"], **kwargs) -> Env[np.ndarray, Union[np.ndarray, int]]: ... @overload def make(id: Literal["CliffWalking-v0"], **kwargs) -> Env[np.ndarray, Union[np.ndarray, int]]: ... @overload def make(id: Literal["Taxi-v3"], **kwargs) -> Env[np.ndarray, Union[np.ndarray, int]]: ... # Mujoco # ---------------------------------------- @overload def make(id: Literal[ "Reacher-v2", "Reacher-v4", "Pusher-v2", "Pusher-v4", "InvertedPendulum-v2", "InvertedPendulum-v4", "InvertedDoublePendulum-v2", "InvertedDoublePendulum-v4", "HalfCheetah-v2", "HalfCheetah-v3", "HalfCheetah-v4", "Hopper-v2", "Hopper-v3", "Hopper-v4", "Swimmer-v2", "Swimmer-v3", "Swimmer-v4", "Walker2d-v2", "Walker2d-v3", "Walker2d-v4", "Ant-v2", "Ant-v3", "Ant-v4", "HumanoidStandup-v2", "HumanoidStandup-v4", "Humanoid-v2", "Humanoid-v3", "Humanoid-v4", ], **kwargs) -> Env[np.ndarray, np.ndarray]: ... # fmt: on # Global registry of environments. Meant to be accessed through `register` and `make` registry: Dict[str, EnvSpec] = {} current_namespace: Optional[str] = None def _check_spec_register(spec: EnvSpec): """Checks whether the spec is valid to be registered. Helper function for `register`.""" global registry latest_versioned_spec = max( ( spec_ for spec_ in registry.values() if spec_.namespace == spec.namespace and spec_.name == spec.name and spec_.version is not None ), key=lambda spec_: int(spec_.version), # type: ignore default=None, ) unversioned_spec = next( ( spec_ for spec_ in registry.values() if spec_.namespace == spec.namespace and spec_.name == spec.name and spec_.version is None ), None, ) if unversioned_spec is not None and spec.version is not None: raise error.RegistrationError( "Can't register the versioned environment " f"`{spec.id}` when the unversioned environment " f"`{unversioned_spec.id}` of the same name already exists." ) elif latest_versioned_spec is not None and spec.version is None: raise error.RegistrationError( "Can't register the unversioned environment " f"`{spec.id}` when the versioned environment " f"`{latest_versioned_spec.id}` of the same name " f"already exists. Note: the default behavior is " f"that `gym.make` with the unversioned environment " f"will return the latest versioned environment" ) # Public API @contextlib.contextmanager def namespace(ns: str): global current_namespace old_namespace = current_namespace current_namespace = ns yield current_namespace = old_namespace def register( id: str, entry_point: Union[Callable, str], reward_threshold: Optional[float] = None, nondeterministic: bool = False, max_episode_steps: Optional[int] = None, order_enforce: bool = True, autoreset: bool = False, disable_env_checker: bool = False, apply_api_compatibility: bool = False, **kwargs, ): """Register an environment with gym. The `id` parameter corresponds to the name of the environment, with the syntax as follows: `(namespace)/(env_name)-v(version)` where `namespace` is optional. It takes arbitrary keyword arguments, which are passed to the `EnvSpec` constructor. Args: id: The environment id entry_point: The entry point for creating the environment reward_threshold: The reward threshold considered to have learnt an environment nondeterministic: If the environment is nondeterministic (even with knowledge of the initial seed and all actions) max_episode_steps: The maximum number of episodes steps before truncation. Used by the Time Limit wrapper. order_enforce: If to enable the order enforcer wrapper to ensure users run functions in the correct order autoreset: If to add the autoreset wrapper such that reset does not need to be called. disable_env_checker: If to disable the environment checker for the environment. Recommended to False. apply_api_compatibility: If to apply the `StepAPICompatibility` wrapper. **kwargs: arbitrary keyword arguments which are passed to the environment constructor """ global registry, current_namespace ns, name, version = parse_env_id(id) if current_namespace is not None: if ( kwargs.get("namespace") is not None and kwargs.get("namespace") != current_namespace ): logger.warn( f"Custom namespace `{kwargs.get('namespace')}` is being overridden by namespace `{current_namespace}`. " f"If you are developing a plugin you shouldn't specify a namespace in `register` calls. " "The namespace is specified through the entry point package metadata." ) ns_id = current_namespace else: ns_id = ns full_id = get_env_id(ns_id, name, version) new_spec = EnvSpec( id=full_id, entry_point=entry_point, reward_threshold=reward_threshold, nondeterministic=nondeterministic, max_episode_steps=max_episode_steps, order_enforce=order_enforce, autoreset=autoreset, disable_env_checker=disable_env_checker, apply_api_compatibility=apply_api_compatibility, **kwargs, ) _check_spec_register(new_spec) if new_spec.id in registry: logger.warn(f"Overriding environment {new_spec.id} already in registry.") registry[new_spec.id] = new_spec def make( id: Union[str, EnvSpec], max_episode_steps: Optional[int] = None, autoreset: bool = False, apply_api_compatibility: Optional[bool] = None, disable_env_checker: Optional[bool] = None, **kwargs, ) -> Env: """Create an environment according to the given ID. To find all available environments use `gym.envs.registry.keys()` for all valid ids. Args: id: Name of the environment. Optionally, a module to import can be included, eg. 'module:Env-v0' max_episode_steps: Maximum length of an episode (TimeLimit wrapper). autoreset: Whether to automatically reset the environment after each episode (AutoResetWrapper). apply_api_compatibility: Whether to wrap the environment with the `StepAPICompatibility` wrapper that converts the environment step from a done bool to return termination and truncation bools. By default, the argument is None to which the environment specification `apply_api_compatibility` is used which defaults to False. Otherwise, the value of `apply_api_compatibility` is used. If `True`, the wrapper is applied otherwise, the wrapper is not applied. disable_env_checker: If to run the env checker, None will default to the environment specification `disable_env_checker` (which is by default False, running the environment checker), otherwise will run according to this parameter (`True` = not run, `False` = run) kwargs: Additional arguments to pass to the environment constructor. Returns: An instance of the environment. Raises: Error: If the ``id`` doesn't exist then an error is raised """ if isinstance(id, EnvSpec): spec_ = id else: module, id = (None, id) if ":" not in id else id.split(":") if module is not None: try: importlib.import_module(module) except ModuleNotFoundError as e: raise ModuleNotFoundError( f"{e}. Environment registration via importing a module failed. " f"Check whether '{module}' contains env registration and can be imported." ) spec_ = registry.get(id) ns, name, version = parse_env_id(id) latest_version = find_highest_version(ns, name) if ( version is not None and latest_version is not None and latest_version > version ): logger.warn( f"The environment {id} is out of date. You should consider " f"upgrading to version `v{latest_version}`." ) if version is None and latest_version is not None: version = latest_version new_env_id = get_env_id(ns, name, version) spec_ = registry.get(new_env_id) logger.warn( f"Using the latest versioned environment `{new_env_id}` " f"instead of the unversioned environment `{id}`." ) if spec_ is None: _check_version_exists(ns, name, version) raise error.Error(f"No registered env with id: {id}") _kwargs = spec_.kwargs.copy() _kwargs.update(kwargs) if spec_.entry_point is None: raise error.Error(f"{spec_.id} registered but entry_point is not specified") elif callable(spec_.entry_point): env_creator = spec_.entry_point else: # Assume it's a string env_creator = load(spec_.entry_point) mode = _kwargs.get("render_mode") apply_human_rendering = False apply_render_collection = False # If we have access to metadata we check that "render_mode" is valid and see if the HumanRendering wrapper needs to be applied if mode is not None and hasattr(env_creator, "metadata"): assert isinstance( env_creator.metadata, dict ), f"Expect the environment creator ({env_creator}) metadata to be dict, actual type: {type(env_creator.metadata)}" if "render_modes" in env_creator.metadata: render_modes = env_creator.metadata["render_modes"] if not isinstance(render_modes, Sequence): logger.warn( f"Expects the environment metadata render_modes to be a Sequence (tuple or list), actual type: {type(render_modes)}" ) # Apply the `HumanRendering` wrapper, if the mode=="human" but "human" not in render_modes if ( mode == "human" and "human" not in render_modes and ("rgb_array" in render_modes or "rgb_array_list" in render_modes) ): logger.warn( "You are trying to use 'human' rendering for an environment that doesn't natively support it. " "The HumanRendering wrapper is being applied to your environment." ) apply_human_rendering = True if "rgb_array" in render_modes: _kwargs["render_mode"] = "rgb_array" else: _kwargs["render_mode"] = "rgb_array_list" elif ( mode not in render_modes and mode.endswith("_list") and mode[: -len("_list")] in render_modes ): _kwargs["render_mode"] = mode[: -len("_list")] apply_render_collection = True elif mode not in render_modes: logger.warn( f"The environment is being initialised with mode ({mode}) that is not in the possible render_modes ({render_modes})." ) else: logger.warn( f"The environment creator metadata doesn't include `render_modes`, contains: {list(env_creator.metadata.keys())}" ) if apply_api_compatibility is True or ( apply_api_compatibility is None and spec_.apply_api_compatibility is True ): # If we use the compatibility layer, we treat the render mode explicitly and don't pass it to the env creator render_mode = _kwargs.pop("render_mode", None) else: render_mode = None try: env = env_creator(**_kwargs) except TypeError as e: if ( str(e).find("got an unexpected keyword argument 'render_mode'") >= 0 and apply_human_rendering ): raise error.Error( f"You passed render_mode='human' although {id} doesn't implement human-rendering natively. " "Gym tried to apply the HumanRendering wrapper but it looks like your environment is using the old " "rendering API, which is not supported by the HumanRendering wrapper." ) else: raise e # Copies the environment creation specification and kwargs to add to the environment specification details spec_ = copy.deepcopy(spec_) spec_.kwargs = _kwargs env.unwrapped.spec = spec_ # Add step API wrapper if apply_api_compatibility is True or ( apply_api_compatibility is None and spec_.apply_api_compatibility is True ): env = EnvCompatibility(env, render_mode) # Run the environment checker as the lowest level wrapper if disable_env_checker is False or ( disable_env_checker is None and spec_.disable_env_checker is False ): env = PassiveEnvChecker(env) # Add the order enforcing wrapper if spec_.order_enforce: env = OrderEnforcing(env) # Add the time limit wrapper if max_episode_steps is not None: env = TimeLimit(env, max_episode_steps) elif spec_.max_episode_steps is not None: env = TimeLimit(env, spec_.max_episode_steps) # Add the autoreset wrapper if autoreset: env = AutoResetWrapper(env) # Add human rendering wrapper if apply_human_rendering: env = HumanRendering(env) elif apply_render_collection: env = RenderCollection(env) return env def spec(env_id: str) -> EnvSpec: """Retrieve the spec for the given environment from the global registry.""" spec_ = registry.get(env_id) if spec_ is None: ns, name, version = parse_env_id(env_id) _check_version_exists(ns, name, version) raise error.Error(f"No registered env with id: {env_id}") else: assert isinstance(spec_, EnvSpec) return spec_ ================================================ FILE: gym/envs/toy_text/__init__.py ================================================ from gym.envs.toy_text.blackjack import BlackjackEnv from gym.envs.toy_text.cliffwalking import CliffWalkingEnv from gym.envs.toy_text.frozen_lake import FrozenLakeEnv from gym.envs.toy_text.taxi import TaxiEnv ================================================ FILE: gym/envs/toy_text/blackjack.py ================================================ import os from typing import Optional import numpy as np import gym from gym import spaces from gym.error import DependencyNotInstalled def cmp(a, b): return float(a > b) - float(a < b) # 1 = Ace, 2-10 = Number cards, Jack/Queen/King = 10 deck = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10] def draw_card(np_random): return int(np_random.choice(deck)) def draw_hand(np_random): return [draw_card(np_random), draw_card(np_random)] def usable_ace(hand): # Does this hand have a usable ace? return 1 in hand and sum(hand) + 10 <= 21 def sum_hand(hand): # Return current hand total if usable_ace(hand): return sum(hand) + 10 return sum(hand) def is_bust(hand): # Is this hand a bust? return sum_hand(hand) > 21 def score(hand): # What is the score of this hand (0 if bust) return 0 if is_bust(hand) else sum_hand(hand) def is_natural(hand): # Is this hand a natural blackjack? return sorted(hand) == [1, 10] class BlackjackEnv(gym.Env): """ Blackjack is a card game where the goal is to beat the dealer by obtaining cards that sum to closer to 21 (without going over 21) than the dealers cards. ### Description Card Values: - Face cards (Jack, Queen, King) have a point value of 10. - Aces can either count as 11 (called a 'usable ace') or 1. - Numerical cards (2-9) have a value equal to their number. This game is played with an infinite deck (or with replacement). The game starts with the dealer having one face up and one face down card, while the player has two face up cards. The player can request additional cards (hit, action=1) until they decide to stop (stick, action=0) or exceed 21 (bust, immediate loss). After the player sticks, the dealer reveals their facedown card, and draws until their sum is 17 or greater. If the dealer goes bust, the player wins. If neither the player nor the dealer busts, the outcome (win, lose, draw) is decided by whose sum is closer to 21. ### Action Space There are two actions: stick (0), and hit (1). ### Observation Space The observation consists of a 3-tuple containing: the player's current sum, the value of the dealer's one showing card (1-10 where 1 is ace), and whether the player holds a usable ace (0 or 1). This environment corresponds to the version of the blackjack problem described in Example 5.1 in Reinforcement Learning: An Introduction by Sutton and Barto (http://incompleteideas.net/book/the-book-2nd.html). ### Rewards - win game: +1 - lose game: -1 - draw game: 0 - win game with natural blackjack: +1.5 (if natural is True) +1 (if natural is False) ### Arguments ``` gym.make('Blackjack-v1', natural=False, sab=False) ``` `natural=False`: Whether to give an additional reward for starting with a natural blackjack, i.e. starting with an ace and ten (sum is 21). `sab=False`: Whether to follow the exact rules outlined in the book by Sutton and Barto. If `sab` is `True`, the keyword argument `natural` will be ignored. If the player achieves a natural blackjack and the dealer does not, the player will win (i.e. get a reward of +1). The reverse rule does not apply. If both the player and the dealer get a natural, it will be a draw (i.e. reward 0). ### Version History * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": ["human", "rgb_array"], "render_fps": 4, } def __init__(self, render_mode: Optional[str] = None, natural=False, sab=False): self.action_space = spaces.Discrete(2) self.observation_space = spaces.Tuple( (spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2)) ) # Flag to payout 1.5 on a "natural" blackjack win, like casino rules # Ref: http://www.bicyclecards.com/how-to-play/blackjack/ self.natural = natural # Flag for full agreement with the (Sutton and Barto, 2018) definition. Overrides self.natural self.sab = sab self.render_mode = render_mode def step(self, action): assert self.action_space.contains(action) if action: # hit: add a card to players hand and return self.player.append(draw_card(self.np_random)) if is_bust(self.player): terminated = True reward = -1.0 else: terminated = False reward = 0.0 else: # stick: play out the dealers hand, and score terminated = True while sum_hand(self.dealer) < 17: self.dealer.append(draw_card(self.np_random)) reward = cmp(score(self.player), score(self.dealer)) if self.sab and is_natural(self.player) and not is_natural(self.dealer): # Player automatically wins. Rules consistent with S&B reward = 1.0 elif ( not self.sab and self.natural and is_natural(self.player) and reward == 1.0 ): # Natural gives extra points, but doesn't autowin. Legacy implementation reward = 1.5 if self.render_mode == "human": self.render() return self._get_obs(), reward, terminated, False, {} def _get_obs(self): return (sum_hand(self.player), self.dealer[0], usable_ace(self.player)) def reset( self, seed: Optional[int] = None, options: Optional[dict] = None, ): super().reset(seed=seed) self.dealer = draw_hand(self.np_random) self.player = draw_hand(self.np_random) _, dealer_card_value, _ = self._get_obs() suits = ["C", "D", "H", "S"] self.dealer_top_card_suit = self.np_random.choice(suits) if dealer_card_value == 1: self.dealer_top_card_value_str = "A" elif dealer_card_value == 10: self.dealer_top_card_value_str = self.np_random.choice(["J", "Q", "K"]) else: self.dealer_top_card_value_str = str(dealer_card_value) if self.render_mode == "human": self.render() return self._get_obs(), {} def render(self): if self.render_mode is None: gym.logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) return try: import pygame except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[toy_text]`" ) player_sum, dealer_card_value, usable_ace = self._get_obs() screen_width, screen_height = 600, 500 card_img_height = screen_height // 3 card_img_width = int(card_img_height * 142 / 197) spacing = screen_height // 20 bg_color = (7, 99, 36) white = (255, 255, 255) if not hasattr(self, "screen"): pygame.init() if self.render_mode == "human": pygame.display.init() self.screen = pygame.display.set_mode((screen_width, screen_height)) else: pygame.font.init() self.screen = pygame.Surface((screen_width, screen_height)) if not hasattr(self, "clock"): self.clock = pygame.time.Clock() self.screen.fill(bg_color) def get_image(path): cwd = os.path.dirname(__file__) image = pygame.image.load(os.path.join(cwd, path)) return image def get_font(path, size): cwd = os.path.dirname(__file__) font = pygame.font.Font(os.path.join(cwd, path), size) return font small_font = get_font( os.path.join("font", "Minecraft.ttf"), screen_height // 15 ) dealer_text = small_font.render( "Dealer: " + str(dealer_card_value), True, white ) dealer_text_rect = self.screen.blit(dealer_text, (spacing, spacing)) def scale_card_img(card_img): return pygame.transform.scale(card_img, (card_img_width, card_img_height)) dealer_card_img = scale_card_img( get_image( os.path.join( "img", f"{self.dealer_top_card_suit}{self.dealer_top_card_value_str}.png", ) ) ) dealer_card_rect = self.screen.blit( dealer_card_img, ( screen_width // 2 - card_img_width - spacing // 2, dealer_text_rect.bottom + spacing, ), ) hidden_card_img = scale_card_img(get_image(os.path.join("img", "Card.png"))) self.screen.blit( hidden_card_img, ( screen_width // 2 + spacing // 2, dealer_text_rect.bottom + spacing, ), ) player_text = small_font.render("Player", True, white) player_text_rect = self.screen.blit( player_text, (spacing, dealer_card_rect.bottom + 1.5 * spacing) ) large_font = get_font(os.path.join("font", "Minecraft.ttf"), screen_height // 6) player_sum_text = large_font.render(str(player_sum), True, white) player_sum_text_rect = self.screen.blit( player_sum_text, ( screen_width // 2 - player_sum_text.get_width() // 2, player_text_rect.bottom + spacing, ), ) if usable_ace: usable_ace_text = small_font.render("usable ace", True, white) self.screen.blit( usable_ace_text, ( screen_width // 2 - usable_ace_text.get_width() // 2, player_sum_text_rect.bottom + spacing // 2, ), ) if self.render_mode == "human": pygame.event.pump() pygame.display.update() self.clock.tick(self.metadata["render_fps"]) else: return np.transpose( np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2) ) def close(self): if hasattr(self, "screen"): import pygame pygame.display.quit() pygame.quit() # Pixel art from Mariia Khmelnytska (https://www.123rf.com/photo_104453049_stock-vector-pixel-art-playing-cards-standart-deck-vector-set.html) ================================================ FILE: gym/envs/toy_text/cliffwalking.py ================================================ from contextlib import closing from io import StringIO from os import path from typing import Optional import numpy as np from gym import Env, logger, spaces from gym.envs.toy_text.utils import categorical_sample from gym.error import DependencyNotInstalled UP = 0 RIGHT = 1 DOWN = 2 LEFT = 3 class CliffWalkingEnv(Env): """ This is a simple implementation of the Gridworld Cliff reinforcement learning task. Adapted from Example 6.6 (page 106) from [Reinforcement Learning: An Introduction by Sutton and Barto](http://incompleteideas.net/book/bookdraft2018jan1.pdf). With inspiration from: [https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/cliff_walking.py] (https://github.com/dennybritz/reinforcement-learning/blob/master/lib/envs/cliff_walking.py) ### Description The board is a 4x12 matrix, with (using NumPy matrix indexing): - [3, 0] as the start at bottom-left - [3, 11] as the goal at bottom-right - [3, 1..10] as the cliff at bottom-center If the agent steps on the cliff, it returns to the start. An episode terminates when the agent reaches the goal. ### Actions There are 4 discrete deterministic actions: - 0: move up - 1: move right - 2: move down - 3: move left ### Observations There are 3x12 + 1 possible states. In fact, the agent cannot be at the cliff, nor at the goal (as this results in the end of the episode). It remains all the positions of the first 3 rows plus the bottom-left cell. The observation is simply the current position encoded as [flattened index](https://numpy.org/doc/stable/reference/generated/numpy.unravel_index.html). ### Reward Each time step incurs -1 reward, and stepping into the cliff incurs -100 reward. ### Arguments ``` gym.make('CliffWalking-v0') ``` ### Version History - v0: Initial version release """ metadata = { "render_modes": ["human", "rgb_array", "ansi"], "render_fps": 4, } def __init__(self, render_mode: Optional[str] = None): self.shape = (4, 12) self.start_state_index = np.ravel_multi_index((3, 0), self.shape) self.nS = np.prod(self.shape) self.nA = 4 # Cliff Location self._cliff = np.zeros(self.shape, dtype=bool) self._cliff[3, 1:-1] = True # Calculate transition probabilities and rewards self.P = {} for s in range(self.nS): position = np.unravel_index(s, self.shape) self.P[s] = {a: [] for a in range(self.nA)} self.P[s][UP] = self._calculate_transition_prob(position, [-1, 0]) self.P[s][RIGHT] = self._calculate_transition_prob(position, [0, 1]) self.P[s][DOWN] = self._calculate_transition_prob(position, [1, 0]) self.P[s][LEFT] = self._calculate_transition_prob(position, [0, -1]) # Calculate initial state distribution # We always start in state (3, 0) self.initial_state_distrib = np.zeros(self.nS) self.initial_state_distrib[self.start_state_index] = 1.0 self.observation_space = spaces.Discrete(self.nS) self.action_space = spaces.Discrete(self.nA) self.render_mode = render_mode # pygame utils self.cell_size = (60, 60) self.window_size = ( self.shape[1] * self.cell_size[1], self.shape[0] * self.cell_size[0], ) self.window_surface = None self.clock = None self.elf_images = None self.start_img = None self.goal_img = None self.cliff_img = None self.mountain_bg_img = None self.near_cliff_img = None self.tree_img = None def _limit_coordinates(self, coord: np.ndarray) -> np.ndarray: """Prevent the agent from falling out of the grid world.""" coord[0] = min(coord[0], self.shape[0] - 1) coord[0] = max(coord[0], 0) coord[1] = min(coord[1], self.shape[1] - 1) coord[1] = max(coord[1], 0) return coord def _calculate_transition_prob(self, current, delta): """Determine the outcome for an action. Transition Prob is always 1.0. Args: current: Current position on the grid as (row, col) delta: Change in position for transition Returns: Tuple of ``(1.0, new_state, reward, terminated)`` """ new_position = np.array(current) + np.array(delta) new_position = self._limit_coordinates(new_position).astype(int) new_state = np.ravel_multi_index(tuple(new_position), self.shape) if self._cliff[tuple(new_position)]: return [(1.0, self.start_state_index, -100, False)] terminal_state = (self.shape[0] - 1, self.shape[1] - 1) is_terminated = tuple(new_position) == terminal_state return [(1.0, new_state, -1, is_terminated)] def step(self, a): transitions = self.P[self.s][a] i = categorical_sample([t[0] for t in transitions], self.np_random) p, s, r, t = transitions[i] self.s = s self.lastaction = a if self.render_mode == "human": self.render() return (int(s), r, t, False, {"prob": p}) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) self.s = categorical_sample(self.initial_state_distrib, self.np_random) self.lastaction = None if self.render_mode == "human": self.render() return int(self.s), {"prob": 1} def render(self): if self.render_mode is None: logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) elif self.render_mode == "ansi": return self._render_text() else: return self._render_gui(self.render_mode) def _render_gui(self, mode): try: import pygame except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[toy_text]`" ) if self.window_surface is None: pygame.init() if mode == "human": pygame.display.init() pygame.display.set_caption("CliffWalking") self.window_surface = pygame.display.set_mode(self.window_size) else: # rgb_array self.window_surface = pygame.Surface(self.window_size) if self.clock is None: self.clock = pygame.time.Clock() if self.elf_images is None: hikers = [ path.join(path.dirname(__file__), "img/elf_up.png"), path.join(path.dirname(__file__), "img/elf_right.png"), path.join(path.dirname(__file__), "img/elf_down.png"), path.join(path.dirname(__file__), "img/elf_left.png"), ] self.elf_images = [ pygame.transform.scale(pygame.image.load(f_name), self.cell_size) for f_name in hikers ] if self.start_img is None: file_name = path.join(path.dirname(__file__), "img/stool.png") self.start_img = pygame.transform.scale( pygame.image.load(file_name), self.cell_size ) if self.goal_img is None: file_name = path.join(path.dirname(__file__), "img/cookie.png") self.goal_img = pygame.transform.scale( pygame.image.load(file_name), self.cell_size ) if self.mountain_bg_img is None: bg_imgs = [ path.join(path.dirname(__file__), "img/mountain_bg1.png"), path.join(path.dirname(__file__), "img/mountain_bg2.png"), ] self.mountain_bg_img = [ pygame.transform.scale(pygame.image.load(f_name), self.cell_size) for f_name in bg_imgs ] if self.near_cliff_img is None: near_cliff_imgs = [ path.join(path.dirname(__file__), "img/mountain_near-cliff1.png"), path.join(path.dirname(__file__), "img/mountain_near-cliff2.png"), ] self.near_cliff_img = [ pygame.transform.scale(pygame.image.load(f_name), self.cell_size) for f_name in near_cliff_imgs ] if self.cliff_img is None: file_name = path.join(path.dirname(__file__), "img/mountain_cliff.png") self.cliff_img = pygame.transform.scale( pygame.image.load(file_name), self.cell_size ) for s in range(self.nS): row, col = np.unravel_index(s, self.shape) pos = (col * self.cell_size[0], row * self.cell_size[1]) check_board_mask = row % 2 ^ col % 2 self.window_surface.blit(self.mountain_bg_img[check_board_mask], pos) if self._cliff[row, col]: self.window_surface.blit(self.cliff_img, pos) if row < self.shape[0] - 1 and self._cliff[row + 1, col]: self.window_surface.blit(self.near_cliff_img[check_board_mask], pos) if s == self.start_state_index: self.window_surface.blit(self.start_img, pos) if s == self.nS - 1: self.window_surface.blit(self.goal_img, pos) if s == self.s: elf_pos = (pos[0], pos[1] - 0.1 * self.cell_size[1]) last_action = self.lastaction if self.lastaction is not None else 2 self.window_surface.blit(self.elf_images[last_action], elf_pos) if mode == "human": pygame.event.pump() pygame.display.update() self.clock.tick(self.metadata["render_fps"]) else: # rgb_array return np.transpose( np.array(pygame.surfarray.pixels3d(self.window_surface)), axes=(1, 0, 2) ) def _render_text(self): outfile = StringIO() for s in range(self.nS): position = np.unravel_index(s, self.shape) if self.s == s: output = " x " # Print terminal state elif position == (3, 11): output = " T " elif self._cliff[position]: output = " C " else: output = " o " if position[1] == 0: output = output.lstrip() if position[1] == self.shape[1] - 1: output = output.rstrip() output += "\n" outfile.write(output) outfile.write("\n") with closing(outfile): return outfile.getvalue() ================================================ FILE: gym/envs/toy_text/frozen_lake.py ================================================ from contextlib import closing from io import StringIO from os import path from typing import List, Optional import numpy as np from gym import Env, logger, spaces, utils from gym.envs.toy_text.utils import categorical_sample from gym.error import DependencyNotInstalled LEFT = 0 DOWN = 1 RIGHT = 2 UP = 3 MAPS = { "4x4": ["SFFF", "FHFH", "FFFH", "HFFG"], "8x8": [ "SFFFFFFF", "FFFFFFFF", "FFFHFFFF", "FFFFFHFF", "FFFHFFFF", "FHHFFFHF", "FHFFHFHF", "FFFHFFFG", ], } # DFS to check that it's a valid path. def is_valid(board: List[List[str]], max_size: int) -> bool: frontier, discovered = [], set() frontier.append((0, 0)) while frontier: r, c = frontier.pop() if not (r, c) in discovered: discovered.add((r, c)) directions = [(1, 0), (0, 1), (-1, 0), (0, -1)] for x, y in directions: r_new = r + x c_new = c + y if r_new < 0 or r_new >= max_size or c_new < 0 or c_new >= max_size: continue if board[r_new][c_new] == "G": return True if board[r_new][c_new] != "H": frontier.append((r_new, c_new)) return False def generate_random_map(size: int = 8, p: float = 0.8) -> List[str]: """Generates a random valid map (one that has a path from start to goal) Args: size: size of each side of the grid p: probability that a tile is frozen Returns: A random valid map """ valid = False board = [] # initialize to make pyright happy while not valid: p = min(1, p) board = np.random.choice(["F", "H"], (size, size), p=[p, 1 - p]) board[0][0] = "S" board[-1][-1] = "G" valid = is_valid(board, size) return ["".join(x) for x in board] class FrozenLakeEnv(Env): """ Frozen lake involves crossing a frozen lake from Start(S) to Goal(G) without falling into any Holes(H) by walking over the Frozen(F) lake. The agent may not always move in the intended direction due to the slippery nature of the frozen lake. ### Action Space The agent takes a 1-element vector for actions. The action space is `(dir)`, where `dir` decides direction to move in which can be: - 0: LEFT - 1: DOWN - 2: RIGHT - 3: UP ### Observation Space The observation is a value representing the agent's current position as current_row * nrows + current_col (where both the row and col start at 0). For example, the goal position in the 4x4 map can be calculated as follows: 3 * 4 + 3 = 15. The number of possible observations is dependent on the size of the map. For example, the 4x4 map has 16 possible observations. ### Rewards Reward schedule: - Reach goal(G): +1 - Reach hole(H): 0 - Reach frozen(F): 0 ### Arguments ``` gym.make('FrozenLake-v1', desc=None, map_name="4x4", is_slippery=True) ``` `desc`: Used to specify custom map for frozen lake. For example, desc=["SFFF", "FHFH", "FFFH", "HFFG"]. A random generated map can be specified by calling the function `generate_random_map`. For example, ``` from gym.envs.toy_text.frozen_lake import generate_random_map gym.make('FrozenLake-v1', desc=generate_random_map(size=8)) ``` `map_name`: ID to use any of the preloaded maps. "4x4":[ "SFFF", "FHFH", "FFFH", "HFFG" ] "8x8": [ "SFFFFFFF", "FFFFFFFF", "FFFHFFFF", "FFFFFHFF", "FFFHFFFF", "FHHFFFHF", "FHFFHFHF", "FFFHFFFG", ] `is_slippery`: True/False. If True will move in intended direction with probability of 1/3 else will move in either perpendicular direction with equal probability of 1/3 in both directions. For example, if action is left and is_slippery is True, then: - P(move left)=1/3 - P(move up)=1/3 - P(move down)=1/3 ### Version History * v1: Bug fixes to rewards * v0: Initial versions release (1.0.0) """ metadata = { "render_modes": ["human", "ansi", "rgb_array"], "render_fps": 4, } def __init__( self, render_mode: Optional[str] = None, desc=None, map_name="4x4", is_slippery=True, ): if desc is None and map_name is None: desc = generate_random_map() elif desc is None: desc = MAPS[map_name] self.desc = desc = np.asarray(desc, dtype="c") self.nrow, self.ncol = nrow, ncol = desc.shape self.reward_range = (0, 1) nA = 4 nS = nrow * ncol self.initial_state_distrib = np.array(desc == b"S").astype("float64").ravel() self.initial_state_distrib /= self.initial_state_distrib.sum() self.P = {s: {a: [] for a in range(nA)} for s in range(nS)} def to_s(row, col): return row * ncol + col def inc(row, col, a): if a == LEFT: col = max(col - 1, 0) elif a == DOWN: row = min(row + 1, nrow - 1) elif a == RIGHT: col = min(col + 1, ncol - 1) elif a == UP: row = max(row - 1, 0) return (row, col) def update_probability_matrix(row, col, action): newrow, newcol = inc(row, col, action) newstate = to_s(newrow, newcol) newletter = desc[newrow, newcol] terminated = bytes(newletter) in b"GH" reward = float(newletter == b"G") return newstate, reward, terminated for row in range(nrow): for col in range(ncol): s = to_s(row, col) for a in range(4): li = self.P[s][a] letter = desc[row, col] if letter in b"GH": li.append((1.0, s, 0, True)) else: if is_slippery: for b in [(a - 1) % 4, a, (a + 1) % 4]: li.append( (1.0 / 3.0, *update_probability_matrix(row, col, b)) ) else: li.append((1.0, *update_probability_matrix(row, col, a))) self.observation_space = spaces.Discrete(nS) self.action_space = spaces.Discrete(nA) self.render_mode = render_mode # pygame utils self.window_size = (min(64 * ncol, 512), min(64 * nrow, 512)) self.cell_size = ( self.window_size[0] // self.ncol, self.window_size[1] // self.nrow, ) self.window_surface = None self.clock = None self.hole_img = None self.cracked_hole_img = None self.ice_img = None self.elf_images = None self.goal_img = None self.start_img = None def step(self, a): transitions = self.P[self.s][a] i = categorical_sample([t[0] for t in transitions], self.np_random) p, s, r, t = transitions[i] self.s = s self.lastaction = a if self.render_mode == "human": self.render() return (int(s), r, t, False, {"prob": p}) def reset( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ): super().reset(seed=seed) self.s = categorical_sample(self.initial_state_distrib, self.np_random) self.lastaction = None if self.render_mode == "human": self.render() return int(self.s), {"prob": 1} def render(self): if self.render_mode is None: logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) elif self.render_mode == "ansi": return self._render_text() else: # self.render_mode in {"human", "rgb_array"}: return self._render_gui(self.render_mode) def _render_gui(self, mode): try: import pygame except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[toy_text]`" ) if self.window_surface is None: pygame.init() if mode == "human": pygame.display.init() pygame.display.set_caption("Frozen Lake") self.window_surface = pygame.display.set_mode(self.window_size) elif mode == "rgb_array": self.window_surface = pygame.Surface(self.window_size) assert ( self.window_surface is not None ), "Something went wrong with pygame. This should never happen." if self.clock is None: self.clock = pygame.time.Clock() if self.hole_img is None: file_name = path.join(path.dirname(__file__), "img/hole.png") self.hole_img = pygame.transform.scale( pygame.image.load(file_name), self.cell_size ) if self.cracked_hole_img is None: file_name = path.join(path.dirname(__file__), "img/cracked_hole.png") self.cracked_hole_img = pygame.transform.scale( pygame.image.load(file_name), self.cell_size ) if self.ice_img is None: file_name = path.join(path.dirname(__file__), "img/ice.png") self.ice_img = pygame.transform.scale( pygame.image.load(file_name), self.cell_size ) if self.goal_img is None: file_name = path.join(path.dirname(__file__), "img/goal.png") self.goal_img = pygame.transform.scale( pygame.image.load(file_name), self.cell_size ) if self.start_img is None: file_name = path.join(path.dirname(__file__), "img/stool.png") self.start_img = pygame.transform.scale( pygame.image.load(file_name), self.cell_size ) if self.elf_images is None: elfs = [ path.join(path.dirname(__file__), "img/elf_left.png"), path.join(path.dirname(__file__), "img/elf_down.png"), path.join(path.dirname(__file__), "img/elf_right.png"), path.join(path.dirname(__file__), "img/elf_up.png"), ] self.elf_images = [ pygame.transform.scale(pygame.image.load(f_name), self.cell_size) for f_name in elfs ] desc = self.desc.tolist() assert isinstance(desc, list), f"desc should be a list or an array, got {desc}" for y in range(self.nrow): for x in range(self.ncol): pos = (x * self.cell_size[0], y * self.cell_size[1]) rect = (*pos, *self.cell_size) self.window_surface.blit(self.ice_img, pos) if desc[y][x] == b"H": self.window_surface.blit(self.hole_img, pos) elif desc[y][x] == b"G": self.window_surface.blit(self.goal_img, pos) elif desc[y][x] == b"S": self.window_surface.blit(self.start_img, pos) pygame.draw.rect(self.window_surface, (180, 200, 230), rect, 1) # paint the elf bot_row, bot_col = self.s // self.ncol, self.s % self.ncol cell_rect = (bot_col * self.cell_size[0], bot_row * self.cell_size[1]) last_action = self.lastaction if self.lastaction is not None else 1 elf_img = self.elf_images[last_action] if desc[bot_row][bot_col] == b"H": self.window_surface.blit(self.cracked_hole_img, cell_rect) else: self.window_surface.blit(elf_img, cell_rect) if mode == "human": pygame.event.pump() pygame.display.update() self.clock.tick(self.metadata["render_fps"]) elif mode == "rgb_array": return np.transpose( np.array(pygame.surfarray.pixels3d(self.window_surface)), axes=(1, 0, 2) ) @staticmethod def _center_small_rect(big_rect, small_dims): offset_w = (big_rect[2] - small_dims[0]) / 2 offset_h = (big_rect[3] - small_dims[1]) / 2 return ( big_rect[0] + offset_w, big_rect[1] + offset_h, ) def _render_text(self): desc = self.desc.tolist() outfile = StringIO() row, col = self.s // self.ncol, self.s % self.ncol desc = [[c.decode("utf-8") for c in line] for line in desc] desc[row][col] = utils.colorize(desc[row][col], "red", highlight=True) if self.lastaction is not None: outfile.write(f" ({['Left', 'Down', 'Right', 'Up'][self.lastaction]})\n") else: outfile.write("\n") outfile.write("\n".join("".join(line) for line in desc) + "\n") with closing(outfile): return outfile.getvalue() def close(self): if self.window_surface is not None: import pygame pygame.display.quit() pygame.quit() # Elf and stool from https://franuka.itch.io/rpg-snow-tileset # All other assets by Mel Tillery http://www.cyaneus.com/ ================================================ FILE: gym/envs/toy_text/taxi.py ================================================ from contextlib import closing from io import StringIO from os import path from typing import Optional import numpy as np from gym import Env, logger, spaces, utils from gym.envs.toy_text.utils import categorical_sample from gym.error import DependencyNotInstalled MAP = [ "+---------+", "|R: | : :G|", "| : | : : |", "| : : : : |", "| | : | : |", "|Y| : |B: |", "+---------+", ] WINDOW_SIZE = (550, 350) class TaxiEnv(Env): """ The Taxi Problem from "Hierarchical Reinforcement Learning with the MAXQ Value Function Decomposition" by Tom Dietterich ### Description There are four designated locations in the grid world indicated by R(ed), G(reen), Y(ellow), and B(lue). When the episode starts, the taxi starts off at a random square and the passenger is at a random location. The taxi drives to the passenger's location, picks up the passenger, drives to the passenger's destination (another one of the four specified locations), and then drops off the passenger. Once the passenger is dropped off, the episode ends. Map: +---------+ |R: | : :G| | : | : : | | : : : : | | | : | : | |Y| : |B: | +---------+ ### Actions There are 6 discrete deterministic actions: - 0: move south - 1: move north - 2: move east - 3: move west - 4: pickup passenger - 5: drop off passenger ### Observations There are 500 discrete states since there are 25 taxi positions, 5 possible locations of the passenger (including the case when the passenger is in the taxi), and 4 destination locations. Note that there are 400 states that can actually be reached during an episode. The missing states correspond to situations in which the passenger is at the same location as their destination, as this typically signals the end of an episode. Four additional states can be observed right after a successful episodes, when both the passenger and the taxi are at the destination. This gives a total of 404 reachable discrete states. Each state space is represented by the tuple: (taxi_row, taxi_col, passenger_location, destination) An observation is an integer that encodes the corresponding state. The state tuple can then be decoded with the "decode" method. Passenger locations: - 0: R(ed) - 1: G(reen) - 2: Y(ellow) - 3: B(lue) - 4: in taxi Destinations: - 0: R(ed) - 1: G(reen) - 2: Y(ellow) - 3: B(lue) ### Info ``step`` and ``reset()`` will return an info dictionary that contains "p" and "action_mask" containing the probability that the state is taken and a mask of what actions will result in a change of state to speed up training. As Taxi's initial state is a stochastic, the "p" key represents the probability of the transition however this value is currently bugged being 1.0, this will be fixed soon. As the steps are deterministic, "p" represents the probability of the transition which is always 1.0 For some cases, taking an action will have no effect on the state of the agent. In v0.25.0, ``info["action_mask"]`` contains a np.ndarray for each of the action specifying if the action will change the state. To sample a modifying action, use ``action = env.action_space.sample(info["action_mask"])`` Or with a Q-value based algorithm ``action = np.argmax(q_values[obs, np.where(info["action_mask"] == 1)[0]])``. ### Rewards - -1 per step unless other reward is triggered. - +20 delivering passenger. - -10 executing "pickup" and "drop-off" actions illegally. ### Arguments ``` gym.make('Taxi-v3') ``` ### Version History * v3: Map Correction + Cleaner Domain Description, v0.25.0 action masking added to the reset and step information * v2: Disallow Taxi start location = goal location, Update Taxi observations in the rollout, Update Taxi reward threshold. * v1: Remove (3,2) from locs, add passidx<4 check * v0: Initial versions release """ metadata = { "render_modes": ["human", "ansi", "rgb_array"], "render_fps": 4, } def __init__(self, render_mode: Optional[str] = None): self.desc = np.asarray(MAP, dtype="c") self.locs = locs = [(0, 0), (0, 4), (4, 0), (4, 3)] self.locs_colors = [(255, 0, 0), (0, 255, 0), (255, 255, 0), (0, 0, 255)] num_states = 500 num_rows = 5 num_columns = 5 max_row = num_rows - 1 max_col = num_columns - 1 self.initial_state_distrib = np.zeros(num_states) num_actions = 6 self.P = { state: {action: [] for action in range(num_actions)} for state in range(num_states) } for row in range(num_rows): for col in range(num_columns): for pass_idx in range(len(locs) + 1): # +1 for being inside taxi for dest_idx in range(len(locs)): state = self.encode(row, col, pass_idx, dest_idx) if pass_idx < 4 and pass_idx != dest_idx: self.initial_state_distrib[state] += 1 for action in range(num_actions): # defaults new_row, new_col, new_pass_idx = row, col, pass_idx reward = ( -1 ) # default reward when there is no pickup/dropoff terminated = False taxi_loc = (row, col) if action == 0: new_row = min(row + 1, max_row) elif action == 1: new_row = max(row - 1, 0) if action == 2 and self.desc[1 + row, 2 * col + 2] == b":": new_col = min(col + 1, max_col) elif action == 3 and self.desc[1 + row, 2 * col] == b":": new_col = max(col - 1, 0) elif action == 4: # pickup if pass_idx < 4 and taxi_loc == locs[pass_idx]: new_pass_idx = 4 else: # passenger not at location reward = -10 elif action == 5: # dropoff if (taxi_loc == locs[dest_idx]) and pass_idx == 4: new_pass_idx = dest_idx terminated = True reward = 20 elif (taxi_loc in locs) and pass_idx == 4: new_pass_idx = locs.index(taxi_loc) else: # dropoff at wrong location reward = -10 new_state = self.encode( new_row, new_col, new_pass_idx, dest_idx ) self.P[state][action].append( (1.0, new_state, reward, terminated) ) self.initial_state_distrib /= self.initial_state_distrib.sum() self.action_space = spaces.Discrete(num_actions) self.observation_space = spaces.Discrete(num_states) self.render_mode = render_mode # pygame utils self.window = None self.clock = None self.cell_size = ( WINDOW_SIZE[0] / self.desc.shape[1], WINDOW_SIZE[1] / self.desc.shape[0], ) self.taxi_imgs = None self.taxi_orientation = 0 self.passenger_img = None self.destination_img = None self.median_horiz = None self.median_vert = None self.background_img = None def encode(self, taxi_row, taxi_col, pass_loc, dest_idx): # (5) 5, 5, 4 i = taxi_row i *= 5 i += taxi_col i *= 5 i += pass_loc i *= 4 i += dest_idx return i def decode(self, i): out = [] out.append(i % 4) i = i // 4 out.append(i % 5) i = i // 5 out.append(i % 5) i = i // 5 out.append(i) assert 0 <= i < 5 return reversed(out) def action_mask(self, state: int): """Computes an action mask for the action space using the state information.""" mask = np.zeros(6, dtype=np.int8) taxi_row, taxi_col, pass_loc, dest_idx = self.decode(state) if taxi_row < 4: mask[0] = 1 if taxi_row > 0: mask[1] = 1 if taxi_col < 4 and self.desc[taxi_row + 1, 2 * taxi_col + 2] == b":": mask[2] = 1 if taxi_col > 0 and self.desc[taxi_row + 1, 2 * taxi_col] == b":": mask[3] = 1 if pass_loc < 4 and (taxi_row, taxi_col) == self.locs[pass_loc]: mask[4] = 1 if pass_loc == 4 and ( (taxi_row, taxi_col) == self.locs[dest_idx] or (taxi_row, taxi_col) in self.locs ): mask[5] = 1 return mask def step(self, a): transitions = self.P[self.s][a] i = categorical_sample([t[0] for t in transitions], self.np_random) p, s, r, t = transitions[i] self.s = s self.lastaction = a if self.render_mode == "human": self.render() return (int(s), r, t, False, {"prob": p, "action_mask": self.action_mask(s)}) def reset( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ): super().reset(seed=seed) self.s = categorical_sample(self.initial_state_distrib, self.np_random) self.lastaction = None self.taxi_orientation = 0 if self.render_mode == "human": self.render() return int(self.s), {"prob": 1.0, "action_mask": self.action_mask(self.s)} def render(self): if self.render_mode is None: logger.warn( "You are calling render method without specifying any render mode. " "You can specify the render_mode at initialization, " f'e.g. gym("{self.spec.id}", render_mode="rgb_array")' ) if self.render_mode == "ansi": return self._render_text() else: # self.render_mode in {"human", "rgb_array"}: return self._render_gui(self.render_mode) def _render_gui(self, mode): try: import pygame # dependency to pygame only if rendering with human except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[toy_text]`" ) if self.window is None: pygame.init() pygame.display.set_caption("Taxi") if mode == "human": self.window = pygame.display.set_mode(WINDOW_SIZE) elif mode == "rgb_array": self.window = pygame.Surface(WINDOW_SIZE) assert ( self.window is not None ), "Something went wrong with pygame. This should never happen." if self.clock is None: self.clock = pygame.time.Clock() if self.taxi_imgs is None: file_names = [ path.join(path.dirname(__file__), "img/cab_front.png"), path.join(path.dirname(__file__), "img/cab_rear.png"), path.join(path.dirname(__file__), "img/cab_right.png"), path.join(path.dirname(__file__), "img/cab_left.png"), ] self.taxi_imgs = [ pygame.transform.scale(pygame.image.load(file_name), self.cell_size) for file_name in file_names ] if self.passenger_img is None: file_name = path.join(path.dirname(__file__), "img/passenger.png") self.passenger_img = pygame.transform.scale( pygame.image.load(file_name), self.cell_size ) if self.destination_img is None: file_name = path.join(path.dirname(__file__), "img/hotel.png") self.destination_img = pygame.transform.scale( pygame.image.load(file_name), self.cell_size ) self.destination_img.set_alpha(170) if self.median_horiz is None: file_names = [ path.join(path.dirname(__file__), "img/gridworld_median_left.png"), path.join(path.dirname(__file__), "img/gridworld_median_horiz.png"), path.join(path.dirname(__file__), "img/gridworld_median_right.png"), ] self.median_horiz = [ pygame.transform.scale(pygame.image.load(file_name), self.cell_size) for file_name in file_names ] if self.median_vert is None: file_names = [ path.join(path.dirname(__file__), "img/gridworld_median_top.png"), path.join(path.dirname(__file__), "img/gridworld_median_vert.png"), path.join(path.dirname(__file__), "img/gridworld_median_bottom.png"), ] self.median_vert = [ pygame.transform.scale(pygame.image.load(file_name), self.cell_size) for file_name in file_names ] if self.background_img is None: file_name = path.join(path.dirname(__file__), "img/taxi_background.png") self.background_img = pygame.transform.scale( pygame.image.load(file_name), self.cell_size ) desc = self.desc for y in range(0, desc.shape[0]): for x in range(0, desc.shape[1]): cell = (x * self.cell_size[0], y * self.cell_size[1]) self.window.blit(self.background_img, cell) if desc[y][x] == b"|" and (y == 0 or desc[y - 1][x] != b"|"): self.window.blit(self.median_vert[0], cell) elif desc[y][x] == b"|" and ( y == desc.shape[0] - 1 or desc[y + 1][x] != b"|" ): self.window.blit(self.median_vert[2], cell) elif desc[y][x] == b"|": self.window.blit(self.median_vert[1], cell) elif desc[y][x] == b"-" and (x == 0 or desc[y][x - 1] != b"-"): self.window.blit(self.median_horiz[0], cell) elif desc[y][x] == b"-" and ( x == desc.shape[1] - 1 or desc[y][x + 1] != b"-" ): self.window.blit(self.median_horiz[2], cell) elif desc[y][x] == b"-": self.window.blit(self.median_horiz[1], cell) for cell, color in zip(self.locs, self.locs_colors): color_cell = pygame.Surface(self.cell_size) color_cell.set_alpha(128) color_cell.fill(color) loc = self.get_surf_loc(cell) self.window.blit(color_cell, (loc[0], loc[1] + 10)) taxi_row, taxi_col, pass_idx, dest_idx = self.decode(self.s) if pass_idx < 4: self.window.blit(self.passenger_img, self.get_surf_loc(self.locs[pass_idx])) if self.lastaction in [0, 1, 2, 3]: self.taxi_orientation = self.lastaction dest_loc = self.get_surf_loc(self.locs[dest_idx]) taxi_location = self.get_surf_loc((taxi_row, taxi_col)) if dest_loc[1] <= taxi_location[1]: self.window.blit( self.destination_img, (dest_loc[0], dest_loc[1] - self.cell_size[1] // 2), ) self.window.blit(self.taxi_imgs[self.taxi_orientation], taxi_location) else: # change blit order for overlapping appearance self.window.blit(self.taxi_imgs[self.taxi_orientation], taxi_location) self.window.blit( self.destination_img, (dest_loc[0], dest_loc[1] - self.cell_size[1] // 2), ) if mode == "human": pygame.display.update() self.clock.tick(self.metadata["render_fps"]) elif mode == "rgb_array": return np.transpose( np.array(pygame.surfarray.pixels3d(self.window)), axes=(1, 0, 2) ) def get_surf_loc(self, map_loc): return (map_loc[1] * 2 + 1) * self.cell_size[0], ( map_loc[0] + 1 ) * self.cell_size[1] def _render_text(self): desc = self.desc.copy().tolist() outfile = StringIO() out = [[c.decode("utf-8") for c in line] for line in desc] taxi_row, taxi_col, pass_idx, dest_idx = self.decode(self.s) def ul(x): return "_" if x == " " else x if pass_idx < 4: out[1 + taxi_row][2 * taxi_col + 1] = utils.colorize( out[1 + taxi_row][2 * taxi_col + 1], "yellow", highlight=True ) pi, pj = self.locs[pass_idx] out[1 + pi][2 * pj + 1] = utils.colorize( out[1 + pi][2 * pj + 1], "blue", bold=True ) else: # passenger in taxi out[1 + taxi_row][2 * taxi_col + 1] = utils.colorize( ul(out[1 + taxi_row][2 * taxi_col + 1]), "green", highlight=True ) di, dj = self.locs[dest_idx] out[1 + di][2 * dj + 1] = utils.colorize(out[1 + di][2 * dj + 1], "magenta") outfile.write("\n".join(["".join(row) for row in out]) + "\n") if self.lastaction is not None: outfile.write( f" ({['South', 'North', 'East', 'West', 'Pickup', 'Dropoff'][self.lastaction]})\n" ) else: outfile.write("\n") with closing(outfile): return outfile.getvalue() def close(self): if self.window is not None: import pygame pygame.display.quit() pygame.quit() # Taxi rider from https://franuka.itch.io/rpg-asset-pack # All other assets by Mel Tillery http://www.cyaneus.com/ ================================================ FILE: gym/envs/toy_text/utils.py ================================================ import numpy as np def categorical_sample(prob_n, np_random: np.random.Generator): """Sample from categorical distribution where each row specifies class probabilities.""" prob_n = np.asarray(prob_n) csprob_n = np.cumsum(prob_n) return np.argmax(csprob_n > np_random.random()) ================================================ FILE: gym/error.py ================================================ """Set of Error classes for gym.""" import warnings class Error(Exception): """Error superclass.""" # Local errors class Unregistered(Error): """Raised when the user requests an item from the registry that does not actually exist.""" class UnregisteredEnv(Unregistered): """Raised when the user requests an env from the registry that does not actually exist.""" class NamespaceNotFound(UnregisteredEnv): """Raised when the user requests an env from the registry where the namespace doesn't exist.""" class NameNotFound(UnregisteredEnv): """Raised when the user requests an env from the registry where the name doesn't exist.""" class VersionNotFound(UnregisteredEnv): """Raised when the user requests an env from the registry where the version doesn't exist.""" class UnregisteredBenchmark(Unregistered): """Raised when the user requests an env from the registry that does not actually exist.""" class DeprecatedEnv(Error): """Raised when the user requests an env from the registry with an older version number than the latest env with the same name.""" class RegistrationError(Error): """Raised when the user attempts to register an invalid env. For example, an unversioned env when a versioned env exists.""" class UnseedableEnv(Error): """Raised when the user tries to seed an env that does not support seeding.""" class DependencyNotInstalled(Error): """Raised when the user has not installed a dependency.""" class UnsupportedMode(Error): """Raised when the user requests a rendering mode not supported by the environment.""" class ResetNeeded(Error): """When the order enforcing is violated, i.e. step or render is called before reset.""" class ResetNotAllowed(Error): """When the monitor is active, raised when the user tries to step an environment that's not yet terminated or truncated.""" class InvalidAction(Error): """Raised when the user performs an action not contained within the action space.""" # API errors class APIError(Error): """Deprecated, to be removed at gym 1.0.""" def __init__( self, message=None, http_body=None, http_status=None, json_body=None, headers=None, ): """Initialise API error.""" super().__init__(message) warnings.warn("APIError is deprecated and will be removed at gym 1.0") if http_body and hasattr(http_body, "decode"): try: http_body = http_body.decode("utf-8") except Exception: http_body = "" self._message = message self.http_body = http_body self.http_status = http_status self.json_body = json_body self.headers = headers or {} self.request_id = self.headers.get("request-id", None) def __unicode__(self): """Returns a string, if request_id is not None then make message other use the _message.""" if self.request_id is not None: msg = self._message or "" return f"Request {self.request_id}: {msg}" else: return self._message def __str__(self): """Returns the __unicode__.""" return self.__unicode__() class APIConnectionError(APIError): """Deprecated, to be removed at gym 1.0.""" class InvalidRequestError(APIError): """Deprecated, to be removed at gym 1.0.""" def __init__( self, message, param, http_body=None, http_status=None, json_body=None, headers=None, ): """Initialises the invalid request error.""" super().__init__(message, http_body, http_status, json_body, headers) self.param = param class AuthenticationError(APIError): """Deprecated, to be removed at gym 1.0.""" class RateLimitError(APIError): """Deprecated, to be removed at gym 1.0.""" # Video errors class VideoRecorderError(Error): """Unused error.""" class InvalidFrame(Error): """Error message when an invalid frame is captured.""" # Wrapper errors class DoubleWrapperError(Error): """Error message for when using double wrappers.""" class WrapAfterConfigureError(Error): """Error message for using wrap after configure.""" class RetriesExceededError(Error): """Error message for retries exceeding set number.""" # Vectorized environments errors class AlreadyPendingCallError(Exception): """Raised when `reset`, or `step` is called asynchronously (e.g. with `reset_async`, or `step_async` respectively), and `reset_async`, or `step_async` (respectively) is called again (without a complete call to `reset_wait`, or `step_wait` respectively).""" def __init__(self, message: str, name: str): """Initialises the exception with name attributes.""" super().__init__(message) self.name = name class NoAsyncCallError(Exception): """Raised when an asynchronous `reset`, or `step` is not running, but `reset_wait`, or `step_wait` (respectively) is called.""" def __init__(self, message: str, name: str): """Initialises the exception with name attributes.""" super().__init__(message) self.name = name class ClosedEnvironmentError(Exception): """Trying to call `reset`, or `step`, while the environment is closed.""" class CustomSpaceError(Exception): """The space is a custom gym.Space instance, and is not supported by `AsyncVectorEnv` with `shared_memory=True`.""" ================================================ FILE: gym/logger.py ================================================ """Set of functions for logging messages.""" import sys import warnings from typing import Optional, Type from gym.utils import colorize DEBUG = 10 INFO = 20 WARN = 30 ERROR = 40 DISABLED = 50 min_level = 30 # Ensure DeprecationWarning to be displayed (#2685, #3059) warnings.filterwarnings("once", "", DeprecationWarning, module=r"^gym\.") def set_level(level: int): """Set logging threshold on current logger.""" global min_level min_level = level def debug(msg: str, *args: object): """Logs a debug message to the user.""" if min_level <= DEBUG: print(f"DEBUG: {msg % args}", file=sys.stderr) def info(msg: str, *args: object): """Logs an info message to the user.""" if min_level <= INFO: print(f"INFO: {msg % args}", file=sys.stderr) def warn( msg: str, *args: object, category: Optional[Type[Warning]] = None, stacklevel: int = 1, ): """Raises a warning to the user if the min_level <= WARN. Args: msg: The message to warn the user *args: Additional information to warn the user category: The category of warning stacklevel: The stack level to raise to """ if min_level <= WARN: warnings.warn( colorize(f"WARN: {msg % args}", "yellow"), category=category, stacklevel=stacklevel + 1, ) def deprecation(msg: str, *args: object): """Logs a deprecation warning to users.""" warn(msg, *args, category=DeprecationWarning, stacklevel=2) def error(msg: str, *args: object): """Logs an error message if min_level <= ERROR in red on the sys.stderr.""" if min_level <= ERROR: print(colorize(f"ERROR: {msg % args}", "red"), file=sys.stderr) # DEPRECATED: setLevel = set_level ================================================ FILE: gym/py.typed ================================================ ================================================ FILE: gym/spaces/__init__.py ================================================ """This module implements various spaces. Spaces describe mathematical sets and are used in Gym to specify valid actions and observations. Every Gym environment must have the attributes ``action_space`` and ``observation_space``. If, for instance, three possible actions (0,1,2) can be performed in your environment and observations are vectors in the two-dimensional unit cube, the environment code may contain the following two lines:: self.action_space = spaces.Discrete(3) self.observation_space = spaces.Box(0, 1, shape=(2,)) """ from gym.spaces.box import Box from gym.spaces.dict import Dict from gym.spaces.discrete import Discrete from gym.spaces.graph import Graph, GraphInstance from gym.spaces.multi_binary import MultiBinary from gym.spaces.multi_discrete import MultiDiscrete from gym.spaces.sequence import Sequence from gym.spaces.space import Space from gym.spaces.text import Text from gym.spaces.tuple import Tuple from gym.spaces.utils import flatdim, flatten, flatten_space, unflatten __all__ = [ "Space", "Box", "Discrete", "Text", "Graph", "GraphInstance", "MultiDiscrete", "MultiBinary", "Tuple", "Sequence", "Dict", "flatdim", "flatten_space", "flatten", "unflatten", ] ================================================ FILE: gym/spaces/box.py ================================================ """Implementation of a space that represents closed boxes in euclidean space.""" from typing import Dict, List, Optional, Sequence, SupportsFloat, Tuple, Type, Union import numpy as np import gym.error from gym import logger from gym.spaces.space import Space def _short_repr(arr: np.ndarray) -> str: """Create a shortened string representation of a numpy array. If arr is a multiple of the all-ones vector, return a string representation of the multiplier. Otherwise, return a string representation of the entire array. Args: arr: The array to represent Returns: A short representation of the array """ if arr.size != 0 and np.min(arr) == np.max(arr): return str(np.min(arr)) return str(arr) def is_float_integer(var) -> bool: """Checks if a variable is an integer or float.""" return np.issubdtype(type(var), np.integer) or np.issubdtype(type(var), np.floating) class Box(Space[np.ndarray]): r"""A (possibly unbounded) box in :math:`\mathbb{R}^n`. Specifically, a Box represents the Cartesian product of n closed intervals. Each interval has the form of one of :math:`[a, b]`, :math:`(-\infty, b]`, :math:`[a, \infty)`, or :math:`(-\infty, \infty)`. There are two common use cases: * Identical bound for each dimension:: >>> Box(low=-1.0, high=2.0, shape=(3, 4), dtype=np.float32) Box(3, 4) * Independent bound for each dimension:: >>> Box(low=np.array([-1.0, -2.0]), high=np.array([2.0, 4.0]), dtype=np.float32) Box(2,) """ def __init__( self, low: Union[SupportsFloat, np.ndarray], high: Union[SupportsFloat, np.ndarray], shape: Optional[Sequence[int]] = None, dtype: Type = np.float32, seed: Optional[Union[int, np.random.Generator]] = None, ): r"""Constructor of :class:`Box`. The argument ``low`` specifies the lower bound of each dimension and ``high`` specifies the upper bounds. I.e., the space that is constructed will be the product of the intervals :math:`[\text{low}[i], \text{high}[i]]`. If ``low`` (or ``high``) is a scalar, the lower bound (or upper bound, respectively) will be assumed to be this value across all dimensions. Args: low (Union[SupportsFloat, np.ndarray]): Lower bounds of the intervals. high (Union[SupportsFloat, np.ndarray]): Upper bounds of the intervals. shape (Optional[Sequence[int]]): The shape is inferred from the shape of `low` or `high` `np.ndarray`s with `low` and `high` scalars defaulting to a shape of (1,) dtype: The dtype of the elements of the space. If this is an integer type, the :class:`Box` is essentially a discrete space. seed: Optionally, you can use this argument to seed the RNG that is used to sample from the space. Raises: ValueError: If no shape information is provided (shape is None, low is None and high is None) then a value error is raised. """ assert ( dtype is not None ), "Box dtype must be explicitly provided, cannot be None." self.dtype = np.dtype(dtype) # determine shape if it isn't provided directly if shape is not None: assert all( np.issubdtype(type(dim), np.integer) for dim in shape ), f"Expect all shape elements to be an integer, actual type: {tuple(type(dim) for dim in shape)}" shape = tuple(int(dim) for dim in shape) # This changes any np types to int elif isinstance(low, np.ndarray): shape = low.shape elif isinstance(high, np.ndarray): shape = high.shape elif is_float_integer(low) and is_float_integer(high): shape = (1,) else: raise ValueError( f"Box shape is inferred from low and high, expect their types to be np.ndarray, an integer or a float, actual type low: {type(low)}, high: {type(high)}" ) # Capture the boundedness information before replacing np.inf with get_inf _low = np.full(shape, low, dtype=float) if is_float_integer(low) else low self.bounded_below = -np.inf < _low _high = np.full(shape, high, dtype=float) if is_float_integer(high) else high self.bounded_above = np.inf > _high low = _broadcast(low, dtype, shape, inf_sign="-") # type: ignore high = _broadcast(high, dtype, shape, inf_sign="+") # type: ignore assert isinstance(low, np.ndarray) assert ( low.shape == shape ), f"low.shape doesn't match provided shape, low.shape: {low.shape}, shape: {shape}" assert isinstance(high, np.ndarray) assert ( high.shape == shape ), f"high.shape doesn't match provided shape, high.shape: {high.shape}, shape: {shape}" self._shape: Tuple[int, ...] = shape low_precision = get_precision(low.dtype) high_precision = get_precision(high.dtype) dtype_precision = get_precision(self.dtype) if min(low_precision, high_precision) > dtype_precision: # type: ignore logger.warn(f"Box bound precision lowered by casting to {self.dtype}") self.low = low.astype(self.dtype) self.high = high.astype(self.dtype) self.low_repr = _short_repr(self.low) self.high_repr = _short_repr(self.high) super().__init__(self.shape, self.dtype, seed) @property def shape(self) -> Tuple[int, ...]: """Has stricter type than gym.Space - never None.""" return self._shape @property def is_np_flattenable(self): """Checks whether this space can be flattened to a :class:`spaces.Box`.""" return True def is_bounded(self, manner: str = "both") -> bool: """Checks whether the box is bounded in some sense. Args: manner (str): One of ``"both"``, ``"below"``, ``"above"``. Returns: If the space is bounded Raises: ValueError: If `manner` is neither ``"both"`` nor ``"below"`` or ``"above"`` """ below = bool(np.all(self.bounded_below)) above = bool(np.all(self.bounded_above)) if manner == "both": return below and above elif manner == "below": return below elif manner == "above": return above else: raise ValueError( f"manner is not in {{'below', 'above', 'both'}}, actual value: {manner}" ) def sample(self, mask: None = None) -> np.ndarray: r"""Generates a single random sample inside the Box. In creating a sample of the box, each coordinate is sampled (independently) from a distribution that is chosen according to the form of the interval: * :math:`[a, b]` : uniform distribution * :math:`[a, \infty)` : shifted exponential distribution * :math:`(-\infty, b]` : shifted negative exponential distribution * :math:`(-\infty, \infty)` : normal distribution Args: mask: A mask for sampling values from the Box space, currently unsupported. Returns: A sampled value from the Box """ if mask is not None: raise gym.error.Error( f"Box.sample cannot be provided a mask, actual value: {mask}" ) high = self.high if self.dtype.kind == "f" else self.high.astype("int64") + 1 sample = np.empty(self.shape) # Masking arrays which classify the coordinates according to interval # type unbounded = ~self.bounded_below & ~self.bounded_above upp_bounded = ~self.bounded_below & self.bounded_above low_bounded = self.bounded_below & ~self.bounded_above bounded = self.bounded_below & self.bounded_above # Vectorized sampling by interval type sample[unbounded] = self.np_random.normal(size=unbounded[unbounded].shape) sample[low_bounded] = ( self.np_random.exponential(size=low_bounded[low_bounded].shape) + self.low[low_bounded] ) sample[upp_bounded] = ( -self.np_random.exponential(size=upp_bounded[upp_bounded].shape) + self.high[upp_bounded] ) sample[bounded] = self.np_random.uniform( low=self.low[bounded], high=high[bounded], size=bounded[bounded].shape ) if self.dtype.kind == "i": sample = np.floor(sample) return sample.astype(self.dtype) def contains(self, x) -> bool: """Return boolean specifying if x is a valid member of this space.""" if not isinstance(x, np.ndarray): logger.warn("Casting input x to numpy array.") try: x = np.asarray(x, dtype=self.dtype) except (ValueError, TypeError): return False return bool( np.can_cast(x.dtype, self.dtype) and x.shape == self.shape and np.all(x >= self.low) and np.all(x <= self.high) ) def to_jsonable(self, sample_n): """Convert a batch of samples from this space to a JSONable data type.""" return np.array(sample_n).tolist() def from_jsonable(self, sample_n: Sequence[Union[float, int]]) -> List[np.ndarray]: """Convert a JSONable data type to a batch of samples from this space.""" return [np.asarray(sample) for sample in sample_n] def __repr__(self) -> str: """A string representation of this space. The representation will include bounds, shape and dtype. If a bound is uniform, only the corresponding scalar will be given to avoid redundant and ugly strings. Returns: A representation of the space """ return f"Box({self.low_repr}, {self.high_repr}, {self.shape}, {self.dtype})" def __eq__(self, other) -> bool: """Check whether `other` is equivalent to this instance. Doesn't check dtype equivalence.""" return ( isinstance(other, Box) and (self.shape == other.shape) # and (self.dtype == other.dtype) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high) ) def __setstate__(self, state: Dict): """Sets the state of the box for unpickling a box with legacy support.""" super().__setstate__(state) # legacy support through re-adding "low_repr" and "high_repr" if missing from pickled state if not hasattr(self, "low_repr"): self.low_repr = _short_repr(self.low) if not hasattr(self, "high_repr"): self.high_repr = _short_repr(self.high) def get_inf(dtype, sign: str) -> SupportsFloat: """Returns an infinite that doesn't break things. Args: dtype: An `np.dtype` sign (str): must be either `"+"` or `"-"` Returns: Gets an infinite value with the sign and dtype Raises: TypeError: Unknown sign, use either '+' or '-' ValueError: Unknown dtype for infinite bounds """ if np.dtype(dtype).kind == "f": if sign == "+": return np.inf elif sign == "-": return -np.inf else: raise TypeError(f"Unknown sign {sign}, use either '+' or '-'") elif np.dtype(dtype).kind == "i": if sign == "+": return np.iinfo(dtype).max - 2 elif sign == "-": return np.iinfo(dtype).min + 2 else: raise TypeError(f"Unknown sign {sign}, use either '+' or '-'") else: raise ValueError(f"Unknown dtype {dtype} for infinite bounds") def get_precision(dtype) -> SupportsFloat: """Get precision of a data type.""" if np.issubdtype(dtype, np.floating): return np.finfo(dtype).precision else: return np.inf def _broadcast( value: Union[SupportsFloat, np.ndarray], dtype, shape: Tuple[int, ...], inf_sign: str, ) -> np.ndarray: """Handle infinite bounds and broadcast at the same time if needed.""" if is_float_integer(value): value = get_inf(dtype, inf_sign) if np.isinf(value) else value # type: ignore value = np.full(shape, value, dtype=dtype) else: assert isinstance(value, np.ndarray) if np.any(np.isinf(value)): # create new array with dtype, but maintain old one to preserve np.inf temp = value.astype(dtype) temp[np.isinf(value)] = get_inf(dtype, inf_sign) value = temp return value ================================================ FILE: gym/spaces/dict.py ================================================ """Implementation of a space that represents the cartesian product of other spaces as a dictionary.""" from collections import OrderedDict from collections.abc import Mapping, Sequence from typing import Any from typing import Dict as TypingDict from typing import List, Optional from typing import Sequence as TypingSequence from typing import Tuple, Union import numpy as np from gym.spaces.space import Space class Dict(Space[TypingDict[str, Space]], Mapping): """A dictionary of :class:`Space` instances. Elements of this space are (ordered) dictionaries of elements from the constituent spaces. Example usage: >>> from gym.spaces import Dict, Discrete >>> observation_space = Dict({"position": Discrete(2), "velocity": Discrete(3)}) >>> observation_space.sample() OrderedDict([('position', 1), ('velocity', 2)]) Example usage [nested]:: >>> from gym.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete >>> Dict( ... { ... "ext_controller": MultiDiscrete([5, 2, 2]), ... "inner_state": Dict( ... { ... "charge": Discrete(100), ... "system_checks": MultiBinary(10), ... "job_status": Dict( ... { ... "task": Discrete(5), ... "progress": Box(low=0, high=100, shape=()), ... } ... ), ... } ... ), ... } ... ) It can be convenient to use :class:`Dict` spaces if you want to make complex observations or actions more human-readable. Usually, it will not be possible to use elements of this space directly in learning code. However, you can easily convert `Dict` observations to flat arrays by using a :class:`gym.wrappers.FlattenObservation` wrapper. Similar wrappers can be implemented to deal with :class:`Dict` actions. """ def __init__( self, spaces: Optional[ Union[ TypingDict[str, Space], TypingSequence[Tuple[str, Space]], ] ] = None, seed: Optional[Union[dict, int, np.random.Generator]] = None, **spaces_kwargs: Space, ): """Constructor of :class:`Dict` space. This space can be instantiated in one of two ways: Either you pass a dictionary of spaces to :meth:`__init__` via the ``spaces`` argument, or you pass the spaces as separate keyword arguments (where you will need to avoid the keys ``spaces`` and ``seed``) Example:: >>> from gym.spaces import Box, Discrete >>> Dict({"position": Box(-1, 1, shape=(2,)), "color": Discrete(3)}) Dict(color:Discrete(3), position:Box(-1.0, 1.0, (2,), float32)) >>> Dict(position=Box(-1, 1, shape=(2,)), color=Discrete(3)) Dict(color:Discrete(3), position:Box(-1.0, 1.0, (2,), float32)) Args: spaces: A dictionary of spaces. This specifies the structure of the :class:`Dict` space seed: Optionally, you can use this argument to seed the RNGs of the spaces that make up the :class:`Dict` space. **spaces_kwargs: If ``spaces`` is ``None``, you need to pass the constituent spaces as keyword arguments, as described above. """ # Convert the spaces into an OrderedDict if isinstance(spaces, Mapping) and not isinstance(spaces, OrderedDict): try: spaces = OrderedDict(sorted(spaces.items())) except TypeError: # Incomparable types (e.g. `int` vs. `str`, or user-defined types) found. # The keys remain in the insertion order. spaces = OrderedDict(spaces.items()) elif isinstance(spaces, Sequence): spaces = OrderedDict(spaces) elif spaces is None: spaces = OrderedDict() else: assert isinstance( spaces, OrderedDict ), f"Unexpected Dict space input, expecting dict, OrderedDict or Sequence, actual type: {type(spaces)}" # Add kwargs to spaces to allow both dictionary and keywords to be used for key, space in spaces_kwargs.items(): if key not in spaces: spaces[key] = space else: raise ValueError( f"Dict space keyword '{key}' already exists in the spaces dictionary." ) self.spaces = spaces for key, space in self.spaces.items(): assert isinstance( space, Space ), f"Dict space element is not an instance of Space: key='{key}', space={space}" super().__init__( None, None, seed # type: ignore ) # None for shape and dtype, since it'll require special handling @property def is_np_flattenable(self): """Checks whether this space can be flattened to a :class:`spaces.Box`.""" return all(space.is_np_flattenable for space in self.spaces.values()) def seed(self, seed: Optional[Union[dict, int]] = None) -> list: """Seed the PRNG of this space and all subspaces. Depending on the type of seed, the subspaces will be seeded differently * None - All the subspaces will use a random initial seed * Int - The integer is used to seed the `Dict` space that is used to generate seed values for each of the subspaces. Warning, this does not guarantee unique seeds for all of the subspaces. * Dict - Using all the keys in the seed dictionary, the values are used to seed the subspaces. This allows the seeding of multiple composite subspaces (`Dict["space": Dict[...], ...]` with `{"space": {...}, ...}`). Args: seed: An optional list of ints or int to seed the (sub-)spaces. """ seeds = [] if isinstance(seed, dict): assert ( seed.keys() == self.spaces.keys() ), f"The seed keys: {seed.keys()} are not identical to space keys: {self.spaces.keys()}" for key in seed.keys(): seeds += self.spaces[key].seed(seed[key]) elif isinstance(seed, int): seeds = super().seed(seed) # Using `np.int32` will mean that the same key occurring is extremely low, even for large subspaces subseeds = self.np_random.integers( np.iinfo(np.int32).max, size=len(self.spaces) ) for subspace, subseed in zip(self.spaces.values(), subseeds): seeds += subspace.seed(int(subseed)) elif seed is None: for space in self.spaces.values(): seeds += space.seed(None) else: raise TypeError( f"Expected seed type: dict, int or None, actual type: {type(seed)}" ) return seeds def sample(self, mask: Optional[TypingDict[str, Any]] = None) -> dict: """Generates a single random sample from this space. The sample is an ordered dictionary of independent samples from the constituent spaces. Args: mask: An optional mask for each of the subspaces, expects the same keys as the space Returns: A dictionary with the same key and sampled values from :attr:`self.spaces` """ if mask is not None: assert isinstance( mask, dict ), f"Expects mask to be a dict, actual type: {type(mask)}" assert ( mask.keys() == self.spaces.keys() ), f"Expect mask keys to be same as space keys, mask keys: {mask.keys()}, space keys: {self.spaces.keys()}" return OrderedDict( [(k, space.sample(mask[k])) for k, space in self.spaces.items()] ) return OrderedDict([(k, space.sample()) for k, space in self.spaces.items()]) def contains(self, x) -> bool: """Return boolean specifying if x is a valid member of this space.""" if isinstance(x, dict) and x.keys() == self.spaces.keys(): return all(x[key] in self.spaces[key] for key in self.spaces.keys()) return False def __getitem__(self, key: str) -> Space: """Get the space that is associated to `key`.""" return self.spaces[key] def __setitem__(self, key: str, value: Space): """Set the space that is associated to `key`.""" assert isinstance( value, Space ), f"Trying to set {key} to Dict space with value that is not a gym space, actual type: {type(value)}" self.spaces[key] = value def __iter__(self): """Iterator through the keys of the subspaces.""" yield from self.spaces def __len__(self) -> int: """Gives the number of simpler spaces that make up the `Dict` space.""" return len(self.spaces) def __repr__(self) -> str: """Gives a string representation of this space.""" return ( "Dict(" + ", ".join([f"{k!r}: {s}" for k, s in self.spaces.items()]) + ")" ) def __eq__(self, other) -> bool: """Check whether `other` is equivalent to this instance.""" return ( isinstance(other, Dict) # Comparison of `OrderedDict`s is order-sensitive and self.spaces == other.spaces # OrderedDict.__eq__ ) def to_jsonable(self, sample_n: list) -> dict: """Convert a batch of samples from this space to a JSONable data type.""" # serialize as dict-repr of vectors return { key: space.to_jsonable([sample[key] for sample in sample_n]) for key, space in self.spaces.items() } def from_jsonable(self, sample_n: TypingDict[str, list]) -> List[dict]: """Convert a JSONable data type to a batch of samples from this space.""" dict_of_list: TypingDict[str, list] = { key: space.from_jsonable(sample_n[key]) for key, space in self.spaces.items() } n_elements = len(next(iter(dict_of_list.values()))) result = [ OrderedDict({key: value[n] for key, value in dict_of_list.items()}) for n in range(n_elements) ] return result ================================================ FILE: gym/spaces/discrete.py ================================================ """Implementation of a space consisting of finitely many elements.""" from typing import Optional, Union import numpy as np from gym.spaces.space import Space class Discrete(Space[int]): r"""A space consisting of finitely many elements. This class represents a finite subset of integers, more specifically a set of the form :math:`\{ a, a+1, \dots, a+n-1 \}`. Example:: >>> Discrete(2) # {0, 1} >>> Discrete(3, start=-1) # {-1, 0, 1} """ def __init__( self, n: int, seed: Optional[Union[int, np.random.Generator]] = None, start: int = 0, ): r"""Constructor of :class:`Discrete` space. This will construct the space :math:`\{\text{start}, ..., \text{start} + n - 1\}`. Args: n (int): The number of elements of this space. seed: Optionally, you can use this argument to seed the RNG that is used to sample from the ``Dict`` space. start (int): The smallest element of this space. """ assert isinstance(n, (int, np.integer)) assert n > 0, "n (counts) have to be positive" assert isinstance(start, (int, np.integer)) self.n = int(n) self.start = int(start) super().__init__((), np.int64, seed) @property def is_np_flattenable(self): """Checks whether this space can be flattened to a :class:`spaces.Box`.""" return True def sample(self, mask: Optional[np.ndarray] = None) -> int: """Generates a single random sample from this space. A sample will be chosen uniformly at random with the mask if provided Args: mask: An optional mask for if an action can be selected. Expected `np.ndarray` of shape `(n,)` and dtype `np.int8` where `1` represents valid actions and `0` invalid / infeasible actions. If there are no possible actions (i.e. `np.all(mask == 0)`) then `space.start` will be returned. Returns: A sampled integer from the space """ if mask is not None: assert isinstance( mask, np.ndarray ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}" assert ( mask.dtype == np.int8 ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}" assert mask.shape == ( self.n, ), f"The expected shape of the mask is {(self.n,)}, actual shape: {mask.shape}" valid_action_mask = mask == 1 assert np.all( np.logical_or(mask == 0, valid_action_mask) ), f"All values of a mask should be 0 or 1, actual values: {mask}" if np.any(valid_action_mask): return int( self.start + self.np_random.choice(np.where(valid_action_mask)[0]) ) else: return self.start return int(self.start + self.np_random.integers(self.n)) def contains(self, x) -> bool: """Return boolean specifying if x is a valid member of this space.""" if isinstance(x, int): as_int = x elif isinstance(x, (np.generic, np.ndarray)) and ( np.issubdtype(x.dtype, np.integer) and x.shape == () ): as_int = int(x) # type: ignore else: return False return self.start <= as_int < self.start + self.n def __repr__(self) -> str: """Gives a string representation of this space.""" if self.start != 0: return f"Discrete({self.n}, start={self.start})" return f"Discrete({self.n})" def __eq__(self, other) -> bool: """Check whether ``other`` is equivalent to this instance.""" return ( isinstance(other, Discrete) and self.n == other.n and self.start == other.start ) def __setstate__(self, state): """Used when loading a pickled space. This method has to be implemented explicitly to allow for loading of legacy states. Args: state: The new state """ # Don't mutate the original state state = dict(state) # Allow for loading of legacy states. # See https://github.com/openai/gym/pull/2470 if "start" not in state: state["start"] = 0 super().__setstate__(state) ================================================ FILE: gym/spaces/graph.py ================================================ """Implementation of a space that represents graph information where nodes and edges can be represented with euclidean space.""" from typing import NamedTuple, Optional, Sequence, Tuple, Union import numpy as np from gym.logger import warn from gym.spaces.box import Box from gym.spaces.discrete import Discrete from gym.spaces.multi_discrete import MultiDiscrete from gym.spaces.space import Space class GraphInstance(NamedTuple): """A Graph space instance. * nodes (np.ndarray): an (n x ...) sized array representing the features for n nodes, (...) must adhere to the shape of the node space. * edges (Optional[np.ndarray]): an (m x ...) sized array representing the features for m edges, (...) must adhere to the shape of the edge space. * edge_links (Optional[np.ndarray]): an (m x 2) sized array of ints representing the indices of the two nodes that each edge connects. """ nodes: np.ndarray edges: Optional[np.ndarray] edge_links: Optional[np.ndarray] class Graph(Space): r"""A space representing graph information as a series of `nodes` connected with `edges` according to an adjacency matrix represented as a series of `edge_links`. Example usage:: self.observation_space = spaces.Graph(node_space=space.Box(low=-100, high=100, shape=(3,)), edge_space=spaces.Discrete(3)) """ def __init__( self, node_space: Union[Box, Discrete], edge_space: Union[None, Box, Discrete], seed: Optional[Union[int, np.random.Generator]] = None, ): r"""Constructor of :class:`Graph`. The argument ``node_space`` specifies the base space that each node feature will use. This argument must be either a Box or Discrete instance. The argument ``edge_space`` specifies the base space that each edge feature will use. This argument must be either a None, Box or Discrete instance. Args: node_space (Union[Box, Discrete]): space of the node features. edge_space (Union[None, Box, Discrete]): space of the node features. seed: Optionally, you can use this argument to seed the RNG that is used to sample from the space. """ assert isinstance( node_space, (Box, Discrete) ), f"Values of the node_space should be instances of Box or Discrete, got {type(node_space)}" if edge_space is not None: assert isinstance( edge_space, (Box, Discrete) ), f"Values of the edge_space should be instances of None Box or Discrete, got {type(node_space)}" self.node_space = node_space self.edge_space = edge_space super().__init__(None, None, seed) @property def is_np_flattenable(self): """Checks whether this space can be flattened to a :class:`spaces.Box`.""" return False def _generate_sample_space( self, base_space: Union[None, Box, Discrete], num: int ) -> Optional[Union[Box, MultiDiscrete]]: if num == 0 or base_space is None: return None if isinstance(base_space, Box): return Box( low=np.array(max(1, num) * [base_space.low]), high=np.array(max(1, num) * [base_space.high]), shape=(num,) + base_space.shape, dtype=base_space.dtype, seed=self.np_random, ) elif isinstance(base_space, Discrete): return MultiDiscrete(nvec=[base_space.n] * num, seed=self.np_random) else: raise TypeError( f"Expects base space to be Box and Discrete, actual space: {type(base_space)}." ) def sample( self, mask: Optional[ Tuple[ Optional[Union[np.ndarray, tuple]], Optional[Union[np.ndarray, tuple]], ] ] = None, num_nodes: int = 10, num_edges: Optional[int] = None, ) -> GraphInstance: """Generates a single sample graph with num_nodes between 1 and 10 sampled from the Graph. Args: mask: An optional tuple of optional node and edge mask that is only possible with Discrete spaces (Box spaces don't support sample masks). If no `num_edges` is provided then the `edge_mask` is multiplied by the number of edges num_nodes: The number of nodes that will be sampled, the default is 10 nodes num_edges: An optional number of edges, otherwise, a random number between 0 and `num_nodes`^2 Returns: A NamedTuple representing a graph with attributes .nodes, .edges, and .edge_links. """ assert ( num_nodes > 0 ), f"The number of nodes is expected to be greater than 0, actual value: {num_nodes}" if mask is not None: node_space_mask, edge_space_mask = mask else: node_space_mask, edge_space_mask = None, None # we only have edges when we have at least 2 nodes if num_edges is None: if num_nodes > 1: # maximal number of edges is `n*(n-1)` allowing self connections and two-way is allowed num_edges = self.np_random.integers(num_nodes * (num_nodes - 1)) else: num_edges = 0 if edge_space_mask is not None: edge_space_mask = tuple(edge_space_mask for _ in range(num_edges)) else: if self.edge_space is None: warn( f"The number of edges is set ({num_edges}) but the edge space is None." ) assert ( num_edges >= 0 ), f"Expects the number of edges to be greater than 0, actual value: {num_edges}" assert num_edges is not None sampled_node_space = self._generate_sample_space(self.node_space, num_nodes) sampled_edge_space = self._generate_sample_space(self.edge_space, num_edges) assert sampled_node_space is not None sampled_nodes = sampled_node_space.sample(node_space_mask) sampled_edges = ( sampled_edge_space.sample(edge_space_mask) if sampled_edge_space is not None else None ) sampled_edge_links = None if sampled_edges is not None and num_edges > 0: sampled_edge_links = self.np_random.integers( low=0, high=num_nodes, size=(num_edges, 2) ) return GraphInstance(sampled_nodes, sampled_edges, sampled_edge_links) def contains(self, x: GraphInstance) -> bool: """Return boolean specifying if x is a valid member of this space.""" if isinstance(x, GraphInstance): # Checks the nodes if isinstance(x.nodes, np.ndarray): if all(node in self.node_space for node in x.nodes): # Check the edges and edge links which are optional if isinstance(x.edges, np.ndarray) and isinstance( x.edge_links, np.ndarray ): assert x.edges is not None assert x.edge_links is not None if self.edge_space is not None: if all(edge in self.edge_space for edge in x.edges): if np.issubdtype(x.edge_links.dtype, np.integer): if x.edge_links.shape == (len(x.edges), 2): if np.all( np.logical_and( x.edge_links >= 0, x.edge_links < len(x.nodes), ) ): return True else: return x.edges is None and x.edge_links is None return False def __repr__(self) -> str: """A string representation of this space. The representation will include node_space and edge_space Returns: A representation of the space """ return f"Graph({self.node_space}, {self.edge_space})" def __eq__(self, other) -> bool: """Check whether `other` is equivalent to this instance.""" return ( isinstance(other, Graph) and (self.node_space == other.node_space) and (self.edge_space == other.edge_space) ) def to_jsonable(self, sample_n: NamedTuple) -> list: """Convert a batch of samples from this space to a JSONable data type.""" # serialize as list of dicts ret_n = [] for sample in sample_n: ret = {} ret["nodes"] = sample.nodes.tolist() if sample.edges is not None: ret["edges"] = sample.edges.tolist() ret["edge_links"] = sample.edge_links.tolist() ret_n.append(ret) return ret_n def from_jsonable(self, sample_n: Sequence[dict]) -> list: """Convert a JSONable data type to a batch of samples from this space.""" ret = [] for sample in sample_n: if "edges" in sample: ret_n = GraphInstance( np.asarray(sample["nodes"]), np.asarray(sample["edges"]), np.asarray(sample["edge_links"]), ) else: ret_n = GraphInstance( np.asarray(sample["nodes"]), None, None, ) ret.append(ret_n) return ret ================================================ FILE: gym/spaces/multi_binary.py ================================================ """Implementation of a space that consists of binary np.ndarrays of a fixed shape.""" from typing import Optional, Sequence, Tuple, Union import numpy as np from gym.spaces.space import Space class MultiBinary(Space[np.ndarray]): """An n-shape binary space. Elements of this space are binary arrays of a shape that is fixed during construction. Example Usage:: >>> observation_space = MultiBinary(5) >>> observation_space.sample() array([0, 1, 0, 1, 0], dtype=int8) >>> observation_space = MultiBinary([3, 2]) >>> observation_space.sample() array([[0, 0], [0, 1], [1, 1]], dtype=int8) """ def __init__( self, n: Union[np.ndarray, Sequence[int], int], seed: Optional[Union[int, np.random.Generator]] = None, ): """Constructor of :class:`MultiBinary` space. Args: n: This will fix the shape of elements of the space. It can either be an integer (if the space is flat) or some sort of sequence (tuple, list or np.ndarray) if there are multiple axes. seed: Optionally, you can use this argument to seed the RNG that is used to sample from the space. """ if isinstance(n, (Sequence, np.ndarray)): self.n = input_n = tuple(int(i) for i in n) assert (np.asarray(input_n) > 0).all() # n (counts) have to be positive else: self.n = n = int(n) input_n = (n,) assert (np.asarray(input_n) > 0).all() # n (counts) have to be positive super().__init__(input_n, np.int8, seed) @property def shape(self) -> Tuple[int, ...]: """Has stricter type than gym.Space - never None.""" return self._shape # type: ignore @property def is_np_flattenable(self): """Checks whether this space can be flattened to a :class:`spaces.Box`.""" return True def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray: """Generates a single random sample from this space. A sample is drawn by independent, fair coin tosses (one toss per binary variable of the space). Args: mask: An optional np.ndarray to mask samples with expected shape of ``space.shape``. For mask == 0 then the samples will be 0 and mask == 1 then random samples will be generated. The expected mask shape is the space shape and mask dtype is `np.int8`. Returns: Sampled values from space """ if mask is not None: assert isinstance( mask, np.ndarray ), f"The expected type of the mask is np.ndarray, actual type: {type(mask)}" assert ( mask.dtype == np.int8 ), f"The expected dtype of the mask is np.int8, actual dtype: {mask.dtype}" assert ( mask.shape == self.shape ), f"The expected shape of the mask is {self.shape}, actual shape: {mask.shape}" assert np.all( (mask == 0) | (mask == 1) | (mask == 2) ), f"All values of a mask should be 0, 1 or 2, actual values: {mask}" return np.where( mask == 2, self.np_random.integers(low=0, high=2, size=self.n, dtype=self.dtype), mask.astype(self.dtype), ) return self.np_random.integers(low=0, high=2, size=self.n, dtype=self.dtype) def contains(self, x) -> bool: """Return boolean specifying if x is a valid member of this space.""" if isinstance(x, Sequence): x = np.array(x) # Promote list to array for contains check return bool( isinstance(x, np.ndarray) and self.shape == x.shape and np.all((x == 0) | (x == 1)) ) def to_jsonable(self, sample_n) -> list: """Convert a batch of samples from this space to a JSONable data type.""" return np.array(sample_n).tolist() def from_jsonable(self, sample_n) -> list: """Convert a JSONable data type to a batch of samples from this space.""" return [np.asarray(sample, self.dtype) for sample in sample_n] def __repr__(self) -> str: """Gives a string representation of this space.""" return f"MultiBinary({self.n})" def __eq__(self, other) -> bool: """Check whether `other` is equivalent to this instance.""" return isinstance(other, MultiBinary) and self.n == other.n ================================================ FILE: gym/spaces/multi_discrete.py ================================================ """Implementation of a space that represents the cartesian product of `Discrete` spaces.""" from typing import Iterable, List, Optional, Sequence, Tuple, Union import numpy as np from gym import logger from gym.spaces.discrete import Discrete from gym.spaces.space import Space class MultiDiscrete(Space[np.ndarray]): """This represents the cartesian product of arbitrary :class:`Discrete` spaces. It is useful to represent game controllers or keyboards where each key can be represented as a discrete action space. Note: Some environment wrappers assume a value of 0 always represents the NOOP action. e.g. Nintendo Game Controller - Can be conceptualized as 3 discrete action spaces: 1. Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4 2. Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 3. Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 It can be initialized as ``MultiDiscrete([ 5, 2, 2 ])`` such that a sample might be ``array([3, 1, 0])``. Although this feature is rarely used, :class:`MultiDiscrete` spaces may also have several axes if ``nvec`` has several axes: Example:: >> d = MultiDiscrete(np.array([[1, 2], [3, 4]])) >> d.sample() array([[0, 0], [2, 3]]) """ def __init__( self, nvec: Union[np.ndarray, list], dtype=np.int64, seed: Optional[Union[int, np.random.Generator]] = None, ): """Constructor of :class:`MultiDiscrete` space. The argument ``nvec`` will determine the number of values each categorical variable can take. Args: nvec: vector of counts of each categorical variable. This will usually be a list of integers. However, you may also pass a more complicated numpy array if you'd like the space to have several axes. dtype: This should be some kind of integer type. seed: Optionally, you can use this argument to seed the RNG that is used to sample from the space. """ self.nvec = np.array(nvec, dtype=dtype, copy=True) assert (self.nvec > 0).all(), "nvec (counts) have to be positive" super().__init__(self.nvec.shape, dtype, seed) @property def shape(self) -> Tuple[int, ...]: """Has stricter type than :class:`gym.Space` - never None.""" return self._shape # type: ignore @property def is_np_flattenable(self): """Checks whether this space can be flattened to a :class:`spaces.Box`.""" return True def sample(self, mask: Optional[tuple] = None) -> np.ndarray: """Generates a single random sample this space. Args: mask: An optional mask for multi-discrete, expects tuples with a `np.ndarray` mask in the position of each action with shape `(n,)` where `n` is the number of actions and `dtype=np.int8`. Only mask values == 1 are possible to sample unless all mask values for an action are 0 then the default action 0 is sampled. Returns: An `np.ndarray` of shape `space.shape` """ if mask is not None: def _apply_mask( sub_mask: Union[np.ndarray, tuple], sub_nvec: Union[np.ndarray, np.integer], ) -> Union[int, List[int]]: if isinstance(sub_nvec, np.ndarray): assert isinstance( sub_mask, tuple ), f"Expects the mask to be a tuple for sub_nvec ({sub_nvec}), actual type: {type(sub_mask)}" assert len(sub_mask) == len( sub_nvec ), f"Expects the mask length to be equal to the number of actions, mask length: {len(sub_mask)}, nvec length: {len(sub_nvec)}" return [ _apply_mask(new_mask, new_nvec) for new_mask, new_nvec in zip(sub_mask, sub_nvec) ] else: assert np.issubdtype( type(sub_nvec), np.integer ), f"Expects the sub_nvec to be an action, actually: {sub_nvec}, {type(sub_nvec)}" assert isinstance( sub_mask, np.ndarray ), f"Expects the sub mask to be np.ndarray, actual type: {type(sub_mask)}" assert ( len(sub_mask) == sub_nvec ), f"Expects the mask length to be equal to the number of actions, mask length: {len(sub_mask)}, action: {sub_nvec}" assert ( sub_mask.dtype == np.int8 ), f"Expects the mask dtype to be np.int8, actual dtype: {sub_mask.dtype}" valid_action_mask = sub_mask == 1 assert np.all( np.logical_or(sub_mask == 0, valid_action_mask) ), f"Expects all masks values to 0 or 1, actual values: {sub_mask}" if np.any(valid_action_mask): return self.np_random.choice(np.where(valid_action_mask)[0]) else: return 0 return np.array(_apply_mask(mask, self.nvec), dtype=self.dtype) return (self.np_random.random(self.nvec.shape) * self.nvec).astype(self.dtype) def contains(self, x) -> bool: """Return boolean specifying if x is a valid member of this space.""" if isinstance(x, Sequence): x = np.array(x) # Promote list to array for contains check # if nvec is uint32 and space dtype is uint32, then 0 <= x < self.nvec guarantees that x # is within correct bounds for space dtype (even though x does not have to be unsigned) return bool( isinstance(x, np.ndarray) and x.shape == self.shape and x.dtype != object and np.all(0 <= x) and np.all(x < self.nvec) ) def to_jsonable(self, sample_n: Iterable[np.ndarray]): """Convert a batch of samples from this space to a JSONable data type.""" return [sample.tolist() for sample in sample_n] def from_jsonable(self, sample_n): """Convert a JSONable data type to a batch of samples from this space.""" return np.array(sample_n) def __repr__(self): """Gives a string representation of this space.""" return f"MultiDiscrete({self.nvec})" def __getitem__(self, index): """Extract a subspace from this ``MultiDiscrete`` space.""" nvec = self.nvec[index] if nvec.ndim == 0: subspace = Discrete(nvec) else: subspace = MultiDiscrete(nvec, self.dtype) # type: ignore # you don't need to deepcopy as np random generator call replaces the state not the data subspace.np_random.bit_generator.state = self.np_random.bit_generator.state return subspace def __len__(self): """Gives the ``len`` of samples from this space.""" if self.nvec.ndim >= 2: logger.warn( "Getting the length of a multi-dimensional MultiDiscrete space." ) return len(self.nvec) def __eq__(self, other): """Check whether ``other`` is equivalent to this instance.""" return isinstance(other, MultiDiscrete) and np.all(self.nvec == other.nvec) ================================================ FILE: gym/spaces/sequence.py ================================================ """Implementation of a space that represents finite-length sequences.""" from collections.abc import Sequence as CollectionSequence from typing import Any, List, Optional, Tuple, Union import numpy as np import gym from gym.spaces.space import Space class Sequence(Space[Tuple]): r"""This space represent sets of finite-length sequences. This space represents the set of tuples of the form :math:`(a_0, \dots, a_n)` where the :math:`a_i` belong to some space that is specified during initialization and the integer :math:`n` is not fixed Example:: >>> space = Sequence(Box(0, 1)) >>> space.sample() (array([0.0259352], dtype=float32),) >>> space.sample() (array([0.80977976], dtype=float32), array([0.80066574], dtype=float32), array([0.77165383], dtype=float32)) """ def __init__( self, space: Space, seed: Optional[Union[int, np.random.Generator]] = None, ): """Constructor of the :class:`Sequence` space. Args: space: Elements in the sequences this space represent must belong to this space. seed: Optionally, you can use this argument to seed the RNG that is used to sample from the space. """ assert isinstance( space, gym.Space ), f"Expects the feature space to be instance of a gym Space, actual type: {type(space)}" self.feature_space = space super().__init__( None, None, seed # type: ignore ) # None for shape and dtype, since it'll require special handling def seed(self, seed: Optional[int] = None) -> list: """Seed the PRNG of this space and the feature space.""" seeds = super().seed(seed) seeds += self.feature_space.seed(seed) return seeds @property def is_np_flattenable(self): """Checks whether this space can be flattened to a :class:`spaces.Box`.""" return False def sample( self, mask: Optional[Tuple[Optional[Union[np.ndarray, int]], Optional[Any]]] = None, ) -> Tuple[Any]: """Generates a single random sample from this space. Args: mask: An optional mask for (optionally) the length of the sequence and (optionally) the values in the sequence. If you specify `mask`, it is expected to be a tuple of the form `(length_mask, sample_mask)` where `length_mask` is - `None` The length will be randomly drawn from a geometric distribution - `np.ndarray` of integers, in which case the length of the sampled sequence is randomly drawn from this array. - `int` for a fixed length sample The second element of the mask tuple `sample` mask specifies a mask that is applied when sampling elements from the base space. The mask is applied for each feature space sample. Returns: A tuple of random length with random samples of elements from the :attr:`feature_space`. """ if mask is not None: length_mask, feature_mask = mask else: length_mask, feature_mask = None, None if length_mask is not None: if np.issubdtype(type(length_mask), np.integer): assert ( 0 <= length_mask ), f"Expects the length mask to be greater than or equal to zero, actual value: {length_mask}" length = length_mask elif isinstance(length_mask, np.ndarray): assert ( len(length_mask.shape) == 1 ), f"Expects the shape of the length mask to be 1-dimensional, actual shape: {length_mask.shape}" assert np.all( 0 <= length_mask ), f"Expects all values in the length_mask to be greater than or equal to zero, actual values: {length_mask}" length = self.np_random.choice(length_mask) else: raise TypeError( f"Expects the type of length_mask to an integer or a np.ndarray, actual type: {type(length_mask)}" ) else: # The choice of 0.25 is arbitrary length = self.np_random.geometric(0.25) return tuple( self.feature_space.sample(mask=feature_mask) for _ in range(length) ) def contains(self, x) -> bool: """Return boolean specifying if x is a valid member of this space.""" return isinstance(x, CollectionSequence) and all( self.feature_space.contains(item) for item in x ) def __repr__(self) -> str: """Gives a string representation of this space.""" return f"Sequence({self.feature_space})" def to_jsonable(self, sample_n: list) -> list: """Convert a batch of samples from this space to a JSONable data type.""" # serialize as dict-repr of vectors return [self.feature_space.to_jsonable(list(sample)) for sample in sample_n] def from_jsonable(self, sample_n: List[List[Any]]) -> list: """Convert a JSONable data type to a batch of samples from this space.""" return [tuple(self.feature_space.from_jsonable(sample)) for sample in sample_n] def __eq__(self, other) -> bool: """Check whether ``other`` is equivalent to this instance.""" return isinstance(other, Sequence) and self.feature_space == other.feature_space ================================================ FILE: gym/spaces/space.py ================================================ """Implementation of the `Space` metaclass.""" from typing import ( Any, Generic, Iterable, List, Mapping, Optional, Sequence, Tuple, Type, TypeVar, Union, ) import numpy as np from gym.utils import seeding T_cov = TypeVar("T_cov", covariant=True) class Space(Generic[T_cov]): """Superclass that is used to define observation and action spaces. Spaces are crucially used in Gym to define the format of valid actions and observations. They serve various purposes: * They clearly define how to interact with environments, i.e. they specify what actions need to look like and what observations will look like * They allow us to work with highly structured data (e.g. in the form of elements of :class:`Dict` spaces) and painlessly transform them into flat arrays that can be used in learning code * They provide a method to sample random elements. This is especially useful for exploration and debugging. Different spaces can be combined hierarchically via container spaces (:class:`Tuple` and :class:`Dict`) to build a more expressive space Warning: Custom observation & action spaces can inherit from the ``Space`` class. However, most use-cases should be covered by the existing space classes (e.g. :class:`Box`, :class:`Discrete`, etc...), and container classes (:class`Tuple` & :class:`Dict`). Note that parametrized probability distributions (through the :meth:`Space.sample()` method), and batching functions (in :class:`gym.vector.VectorEnv`), are only well-defined for instances of spaces provided in gym by default. Moreover, some implementations of Reinforcement Learning algorithms might not handle custom spaces properly. Use custom spaces with care. """ def __init__( self, shape: Optional[Sequence[int]] = None, dtype: Optional[Union[Type, str, np.dtype]] = None, seed: Optional[Union[int, np.random.Generator]] = None, ): """Constructor of :class:`Space`. Args: shape (Optional[Sequence[int]]): If elements of the space are numpy arrays, this should specify their shape. dtype (Optional[Type | str]): If elements of the space are numpy arrays, this should specify their dtype. seed: Optionally, you can use this argument to seed the RNG that is used to sample from the space """ self._shape = None if shape is None else tuple(shape) self.dtype = None if dtype is None else np.dtype(dtype) self._np_random = None if seed is not None: if isinstance(seed, np.random.Generator): self._np_random = seed else: self.seed(seed) @property def np_random(self) -> np.random.Generator: """Lazily seed the PRNG since this is expensive and only needed if sampling from this space.""" if self._np_random is None: self.seed() return self._np_random # type: ignore ## self.seed() call guarantees right type. @property def shape(self) -> Optional[Tuple[int, ...]]: """Return the shape of the space as an immutable property.""" return self._shape @property def is_np_flattenable(self): """Checks whether this space can be flattened to a :class:`spaces.Box`.""" raise NotImplementedError def sample(self, mask: Optional[Any] = None) -> T_cov: """Randomly sample an element of this space. Can be uniform or non-uniform sampling based on boundedness of space. Args: mask: A mask used for sampling, expected ``dtype=np.int8`` and see sample implementation for expected shape. Returns: A sampled actions from the space """ raise NotImplementedError def seed(self, seed: Optional[int] = None) -> list: """Seed the PRNG of this space and possibly the PRNGs of subspaces.""" self._np_random, seed = seeding.np_random(seed) return [seed] def contains(self, x) -> bool: """Return boolean specifying if x is a valid member of this space.""" raise NotImplementedError def __contains__(self, x) -> bool: """Return boolean specifying if x is a valid member of this space.""" return self.contains(x) def __setstate__(self, state: Union[Iterable, Mapping]): """Used when loading a pickled space. This method was implemented explicitly to allow for loading of legacy states. Args: state: The updated state value """ # Don't mutate the original state state = dict(state) # Allow for loading of legacy states. # See: # https://github.com/openai/gym/pull/2397 -- shape # https://github.com/openai/gym/pull/1913 -- np_random # if "shape" in state: state["_shape"] = state["shape"] del state["shape"] if "np_random" in state: state["_np_random"] = state["np_random"] del state["np_random"] # Update our state self.__dict__.update(state) def to_jsonable(self, sample_n: Sequence[T_cov]) -> list: """Convert a batch of samples from this space to a JSONable data type.""" # By default, assume identity is JSONable return list(sample_n) def from_jsonable(self, sample_n: list) -> List[T_cov]: """Convert a JSONable data type to a batch of samples from this space.""" # By default, assume identity is JSONable return sample_n ================================================ FILE: gym/spaces/text.py ================================================ """Implementation of a space that represents textual strings.""" from typing import Any, Dict, FrozenSet, Optional, Set, Tuple, Union import numpy as np from gym.spaces.space import Space alphanumeric: FrozenSet[str] = frozenset( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" ) class Text(Space[str]): r"""A space representing a string comprised of characters from a given charset. Example:: >>> # {"", "B5", "hello", ...} >>> Text(5) >>> # {"0", "42", "0123456789", ...} >>> import string >>> Text(min_length = 1, ... max_length = 10, ... charset = string.digits) """ def __init__( self, max_length: int, *, min_length: int = 1, charset: Union[Set[str], str] = alphanumeric, seed: Optional[Union[int, np.random.Generator]] = None, ): r"""Constructor of :class:`Text` space. Both bounds for text length are inclusive. Args: min_length (int): Minimum text length (in characters). Defaults to 1 to prevent empty strings. max_length (int): Maximum text length (in characters). charset (Union[set], str): Character set, defaults to the lower and upper english alphabet plus latin digits. seed: The seed for sampling from the space. """ assert np.issubdtype( type(min_length), np.integer ), f"Expects the min_length to be an integer, actual type: {type(min_length)}" assert np.issubdtype( type(max_length), np.integer ), f"Expects the max_length to be an integer, actual type: {type(max_length)}" assert ( 0 <= min_length ), f"Minimum text length must be non-negative, actual value: {min_length}" assert ( min_length <= max_length ), f"The min_length must be less than or equal to the max_length, min_length: {min_length}, max_length: {max_length}" self.min_length: int = int(min_length) self.max_length: int = int(max_length) self._char_set: FrozenSet[str] = frozenset(charset) self._char_list: Tuple[str, ...] = tuple(charset) self._char_index: Dict[str, np.int32] = { val: np.int32(i) for i, val in enumerate(tuple(charset)) } self._char_str: str = "".join(sorted(tuple(charset))) # As the shape is dynamic (between min_length and max_length) then None super().__init__(dtype=str, seed=seed) def sample( self, mask: Optional[Tuple[Optional[int], Optional[np.ndarray]]] = None ) -> str: """Generates a single random sample from this space with by default a random length between `min_length` and `max_length` and sampled from the `charset`. Args: mask: An optional tuples of length and mask for the text. The length is expected to be between the `min_length` and `max_length` otherwise a random integer between `min_length` and `max_length` is selected. For the mask, we expect a numpy array of length of the charset passed with `dtype == np.int8`. If the charlist mask is all zero then an empty string is returned no matter the `min_length` Returns: A sampled string from the space """ if mask is not None: assert isinstance( mask, tuple ), f"Expects the mask type to be a tuple, actual type: {type(mask)}" assert ( len(mask) == 2 ), f"Expects the mask length to be two, actual length: {len(mask)}" length, charlist_mask = mask if length is not None: assert np.issubdtype( type(length), np.integer ), f"Expects the Text sample length to be an integer, actual type: {type(length)}" assert ( self.min_length <= length <= self.max_length ), f"Expects the Text sample length be between {self.min_length} and {self.max_length}, actual length: {length}" if charlist_mask is not None: assert isinstance( charlist_mask, np.ndarray ), f"Expects the Text sample mask to be an np.ndarray, actual type: {type(charlist_mask)}" assert ( charlist_mask.dtype == np.int8 ), f"Expects the Text sample mask to be an np.ndarray, actual dtype: {charlist_mask.dtype}" assert charlist_mask.shape == ( len(self.character_set), ), f"expects the Text sample mask to be {(len(self.character_set),)}, actual shape: {charlist_mask.shape}" assert np.all( np.logical_or(charlist_mask == 0, charlist_mask == 1) ), f"Expects all masks values to 0 or 1, actual values: {charlist_mask}" else: length, charlist_mask = None, None if length is None: length = self.np_random.integers(self.min_length, self.max_length + 1) if charlist_mask is None: string = self.np_random.choice(self.character_list, size=length) else: valid_mask = charlist_mask == 1 valid_indexes = np.where(valid_mask)[0] if len(valid_indexes) == 0: if self.min_length == 0: string = "" else: # Otherwise the string will not be contained in the space raise ValueError( f"Trying to sample with a minimum length > 0 ({self.min_length}) but the character mask is all zero meaning that no character could be sampled." ) else: string = "".join( self.character_list[index] for index in self.np_random.choice(valid_indexes, size=length) ) return "".join(string) def contains(self, x: Any) -> bool: """Return boolean specifying if x is a valid member of this space.""" if isinstance(x, str): if self.min_length <= len(x) <= self.max_length: return all(c in self.character_set for c in x) return False def __repr__(self) -> str: """Gives a string representation of this space.""" return ( f"Text({self.min_length}, {self.max_length}, characters={self.characters})" ) def __eq__(self, other) -> bool: """Check whether ``other`` is equivalent to this instance.""" return ( isinstance(other, Text) and self.min_length == other.min_length and self.max_length == other.max_length and self.character_set == other.character_set ) @property def character_set(self) -> FrozenSet[str]: """Returns the character set for the space.""" return self._char_set @property def character_list(self) -> Tuple[str, ...]: """Returns a tuple of characters in the space.""" return self._char_list def character_index(self, char: str) -> np.int32: """Returns a unique index for each character in the space's character set.""" return self._char_index[char] @property def characters(self) -> str: """Returns a string with all Text characters.""" return self._char_str @property def is_np_flattenable(self) -> bool: """The flattened version is an integer array for each character, padded to the max character length.""" return True ================================================ FILE: gym/spaces/tuple.py ================================================ """Implementation of a space that represents the cartesian product of other spaces.""" from collections.abc import Sequence as CollectionSequence from typing import Iterable, Optional from typing import Sequence as TypingSequence from typing import Tuple as TypingTuple from typing import Union import numpy as np from gym.spaces.space import Space class Tuple(Space[tuple], CollectionSequence): """A tuple (more precisely: the cartesian product) of :class:`Space` instances. Elements of this space are tuples of elements of the constituent spaces. Example usage:: >>> from gym.spaces import Box, Discrete >>> observation_space = Tuple((Discrete(2), Box(-1, 1, shape=(2,)))) >>> observation_space.sample() (0, array([0.03633198, 0.42370757], dtype=float32)) """ def __init__( self, spaces: Iterable[Space], seed: Optional[Union[int, TypingSequence[int], np.random.Generator]] = None, ): r"""Constructor of :class:`Tuple` space. The generated instance will represent the cartesian product :math:`\text{spaces}[0] \times ... \times \text{spaces}[-1]`. Args: spaces (Iterable[Space]): The spaces that are involved in the cartesian product. seed: Optionally, you can use this argument to seed the RNGs of the ``spaces`` to ensure reproducible sampling. """ self.spaces = tuple(spaces) for space in self.spaces: assert isinstance( space, Space ), "Elements of the tuple must be instances of gym.Space" super().__init__(None, None, seed) # type: ignore @property def is_np_flattenable(self): """Checks whether this space can be flattened to a :class:`spaces.Box`.""" return all(space.is_np_flattenable for space in self.spaces) def seed( self, seed: Optional[Union[int, TypingSequence[int]]] = None ) -> TypingSequence[int]: """Seed the PRNG of this space and all subspaces. Depending on the type of seed, the subspaces will be seeded differently * None - All the subspaces will use a random initial seed * Int - The integer is used to seed the `Tuple` space that is used to generate seed values for each of the subspaces. Warning, this does not guarantee unique seeds for all of the subspaces. * List - Values used to seed the subspaces. This allows the seeding of multiple composite subspaces (`List(42, 54, ...)`). Args: seed: An optional list of ints or int to seed the (sub-)spaces. """ seeds = [] if isinstance(seed, CollectionSequence): assert len(seed) == len( self.spaces ), f"Expects that the subspaces of seeds equals the number of subspaces. Actual length of seeds: {len(seeds)}, length of subspaces: {len(self.spaces)}" for subseed, space in zip(seed, self.spaces): seeds += space.seed(subseed) elif isinstance(seed, int): seeds = super().seed(seed) subseeds = self.np_random.integers( np.iinfo(np.int32).max, size=len(self.spaces) ) for subspace, subseed in zip(self.spaces, subseeds): seeds += subspace.seed(int(subseed)) elif seed is None: for space in self.spaces: seeds += space.seed(seed) else: raise TypeError( f"Expected seed type: list, tuple, int or None, actual type: {type(seed)}" ) return seeds def sample( self, mask: Optional[TypingTuple[Optional[np.ndarray], ...]] = None ) -> tuple: """Generates a single random sample inside this space. This method draws independent samples from the subspaces. Args: mask: An optional tuple of optional masks for each of the subspace's samples, expects the same number of masks as spaces Returns: Tuple of the subspace's samples """ if mask is not None: assert isinstance( mask, tuple ), f"Expected type of mask is tuple, actual type: {type(mask)}" assert len(mask) == len( self.spaces ), f"Expected length of mask is {len(self.spaces)}, actual length: {len(mask)}" return tuple( space.sample(mask=sub_mask) for space, sub_mask in zip(self.spaces, mask) ) return tuple(space.sample() for space in self.spaces) def contains(self, x) -> bool: """Return boolean specifying if x is a valid member of this space.""" if isinstance(x, (list, np.ndarray)): x = tuple(x) # Promote list and ndarray to tuple for contains check return ( isinstance(x, tuple) and len(x) == len(self.spaces) and all(space.contains(part) for (space, part) in zip(self.spaces, x)) ) def __repr__(self) -> str: """Gives a string representation of this space.""" return "Tuple(" + ", ".join([str(s) for s in self.spaces]) + ")" def to_jsonable(self, sample_n: CollectionSequence) -> list: """Convert a batch of samples from this space to a JSONable data type.""" # serialize as list-repr of tuple of vectors return [ space.to_jsonable([sample[i] for sample in sample_n]) for i, space in enumerate(self.spaces) ] def from_jsonable(self, sample_n) -> list: """Convert a JSONable data type to a batch of samples from this space.""" return [ sample for sample in zip( *[ space.from_jsonable(sample_n[i]) for i, space in enumerate(self.spaces) ] ) ] def __getitem__(self, index: int) -> Space: """Get the subspace at specific `index`.""" return self.spaces[index] def __len__(self) -> int: """Get the number of subspaces that are involved in the cartesian product.""" return len(self.spaces) def __eq__(self, other) -> bool: """Check whether ``other`` is equivalent to this instance.""" return isinstance(other, Tuple) and self.spaces == other.spaces ================================================ FILE: gym/spaces/utils.py ================================================ """Implementation of utility functions that can be applied to spaces. These functions mostly take care of flattening and unflattening elements of spaces to facilitate their usage in learning code. """ import operator as op from collections import OrderedDict from functools import reduce, singledispatch from typing import Dict as TypingDict from typing import TypeVar, Union, cast import numpy as np from gym.spaces import ( Box, Dict, Discrete, Graph, GraphInstance, MultiBinary, MultiDiscrete, Sequence, Space, Text, Tuple, ) @singledispatch def flatdim(space: Space) -> int: """Return the number of dimensions a flattened equivalent of this space would have. Example usage:: >>> from gym.spaces import Discrete >>> space = Dict({"position": Discrete(2), "velocity": Discrete(3)}) >>> flatdim(space) 5 Args: space: The space to return the number of dimensions of the flattened spaces Returns: The number of dimensions for the flattened spaces Raises: NotImplementedError: if the space is not defined in ``gym.spaces``. ValueError: if the space cannot be flattened into a :class:`Box` """ if not space.is_np_flattenable: raise ValueError( f"{space} cannot be flattened to a numpy array, probably because it contains a `Graph` or `Sequence` subspace" ) raise NotImplementedError(f"Unknown space: `{space}`") @flatdim.register(Box) @flatdim.register(MultiBinary) def _flatdim_box_multibinary(space: Union[Box, MultiBinary]) -> int: return reduce(op.mul, space.shape, 1) @flatdim.register(Discrete) def _flatdim_discrete(space: Discrete) -> int: return int(space.n) @flatdim.register(MultiDiscrete) def _flatdim_multidiscrete(space: MultiDiscrete) -> int: return int(np.sum(space.nvec)) @flatdim.register(Tuple) def _flatdim_tuple(space: Tuple) -> int: if space.is_np_flattenable: return sum(flatdim(s) for s in space.spaces) raise ValueError( f"{space} cannot be flattened to a numpy array, probably because it contains a `Graph` or `Sequence` subspace" ) @flatdim.register(Dict) def _flatdim_dict(space: Dict) -> int: if space.is_np_flattenable: return sum(flatdim(s) for s in space.spaces.values()) raise ValueError( f"{space} cannot be flattened to a numpy array, probably because it contains a `Graph` or `Sequence` subspace" ) @flatdim.register(Graph) def _flatdim_graph(space: Graph): raise ValueError( "Cannot get flattened size as the Graph Space in Gym has a dynamic size." ) @flatdim.register(Text) def _flatdim_text(space: Text) -> int: return space.max_length T = TypeVar("T") FlatType = Union[np.ndarray, TypingDict, tuple, GraphInstance] @singledispatch def flatten(space: Space[T], x: T) -> FlatType: """Flatten a data point from a space. This is useful when e.g. points from spaces must be passed to a neural network, which only understands flat arrays of floats. Args: space: The space that ``x`` is flattened by x: The value to flatten Returns: - For ``Box`` and ``MultiBinary``, this is a flattened array - For ``Discrete`` and ``MultiDiscrete``, this is a flattened one-hot array of the sample - For ``Tuple`` and ``Dict``, this is a concatenated array the subspaces (does not support graph subspaces) - For graph spaces, returns `GraphInstance` where: - `nodes` are n x k arrays - `edges` are either: - m x k arrays - None - `edge_links` are either: - m x 2 arrays - None Raises: NotImplementedError: If the space is not defined in ``gym.spaces``. """ raise NotImplementedError(f"Unknown space: `{space}`") @flatten.register(Box) @flatten.register(MultiBinary) def _flatten_box_multibinary(space, x) -> np.ndarray: return np.asarray(x, dtype=space.dtype).flatten() @flatten.register(Discrete) def _flatten_discrete(space, x) -> np.ndarray: onehot = np.zeros(space.n, dtype=space.dtype) onehot[x - space.start] = 1 return onehot @flatten.register(MultiDiscrete) def _flatten_multidiscrete(space, x) -> np.ndarray: offsets = np.zeros((space.nvec.size + 1,), dtype=space.dtype) offsets[1:] = np.cumsum(space.nvec.flatten()) onehot = np.zeros((offsets[-1],), dtype=space.dtype) onehot[offsets[:-1] + x.flatten()] = 1 return onehot @flatten.register(Tuple) def _flatten_tuple(space, x) -> Union[tuple, np.ndarray]: if space.is_np_flattenable: return np.concatenate( [flatten(s, x_part) for x_part, s in zip(x, space.spaces)] ) return tuple(flatten(s, x_part) for x_part, s in zip(x, space.spaces)) @flatten.register(Dict) def _flatten_dict(space, x) -> Union[dict, np.ndarray]: if space.is_np_flattenable: return np.concatenate([flatten(s, x[key]) for key, s in space.spaces.items()]) return OrderedDict((key, flatten(s, x[key])) for key, s in space.spaces.items()) @flatten.register(Graph) def _flatten_graph(space, x) -> GraphInstance: """We're not using `.unflatten() for :class:`Box` and :class:`Discrete` because a graph is not a homogeneous space, see `.flatten` docstring.""" def _graph_unflatten(unflatten_space, unflatten_x): ret = None if unflatten_space is not None and unflatten_x is not None: if isinstance(unflatten_space, Box): ret = unflatten_x.reshape(unflatten_x.shape[0], -1) elif isinstance(unflatten_space, Discrete): ret = np.zeros( (unflatten_x.shape[0], unflatten_space.n - unflatten_space.start), dtype=unflatten_space.dtype, ) ret[ np.arange(unflatten_x.shape[0]), unflatten_x - unflatten_space.start ] = 1 return ret nodes = _graph_unflatten(space.node_space, x.nodes) edges = _graph_unflatten(space.edge_space, x.edges) return GraphInstance(nodes, edges, x.edge_links) @flatten.register(Text) def _flatten_text(space: Text, x: str) -> np.ndarray: arr = np.full( shape=(space.max_length,), fill_value=len(space.character_set), dtype=np.int32 ) for i, val in enumerate(x): arr[i] = space.character_index(val) return arr @flatten.register(Sequence) def _flatten_sequence(space, x) -> tuple: return tuple(flatten(space.feature_space, item) for item in x) @singledispatch def unflatten(space: Space[T], x: FlatType) -> T: """Unflatten a data point from a space. This reverses the transformation applied by :func:`flatten`. You must ensure that the ``space`` argument is the same as for the :func:`flatten` call. Args: space: The space used to unflatten ``x`` x: The array to unflatten Returns: A point with a structure that matches the space. Raises: NotImplementedError: if the space is not defined in ``gym.spaces``. """ raise NotImplementedError(f"Unknown space: `{space}`") @unflatten.register(Box) @unflatten.register(MultiBinary) def _unflatten_box_multibinary( space: Union[Box, MultiBinary], x: np.ndarray ) -> np.ndarray: return np.asarray(x, dtype=space.dtype).reshape(space.shape) @unflatten.register(Discrete) def _unflatten_discrete(space: Discrete, x: np.ndarray) -> int: return int(space.start + np.nonzero(x)[0][0]) @unflatten.register(MultiDiscrete) def _unflatten_multidiscrete(space: MultiDiscrete, x: np.ndarray) -> np.ndarray: offsets = np.zeros((space.nvec.size + 1,), dtype=space.dtype) offsets[1:] = np.cumsum(space.nvec.flatten()) (indices,) = cast(type(offsets[:-1]), np.nonzero(x)) return np.asarray(indices - offsets[:-1], dtype=space.dtype).reshape(space.shape) @unflatten.register(Tuple) def _unflatten_tuple(space: Tuple, x: Union[np.ndarray, tuple]) -> tuple: if space.is_np_flattenable: assert isinstance( x, np.ndarray ), f"{space} is numpy-flattenable. Thus, you should only unflatten numpy arrays for this space. Got a {type(x)}" dims = np.asarray([flatdim(s) for s in space.spaces], dtype=np.int_) list_flattened = np.split(x, np.cumsum(dims[:-1])) return tuple( unflatten(s, flattened) for flattened, s in zip(list_flattened, space.spaces) ) assert isinstance( x, tuple ), f"{space} is not numpy-flattenable. Thus, you should only unflatten tuples for this space. Got a {type(x)}" return tuple(unflatten(s, flattened) for flattened, s in zip(x, space.spaces)) @unflatten.register(Dict) def _unflatten_dict(space: Dict, x: Union[np.ndarray, TypingDict]) -> dict: if space.is_np_flattenable: dims = np.asarray([flatdim(s) for s in space.spaces.values()], dtype=np.int_) list_flattened = np.split(x, np.cumsum(dims[:-1])) return OrderedDict( [ (key, unflatten(s, flattened)) for flattened, (key, s) in zip(list_flattened, space.spaces.items()) ] ) assert isinstance( x, dict ), f"{space} is not numpy-flattenable. Thus, you should only unflatten dictionary for this space. Got a {type(x)}" return OrderedDict((key, unflatten(s, x[key])) for key, s in space.spaces.items()) @unflatten.register(Graph) def _unflatten_graph(space: Graph, x: GraphInstance) -> GraphInstance: """We're not using `.unflatten() for :class:`Box` and :class:`Discrete` because a graph is not a homogeneous space. The size of the outcome is actually not fixed, but determined based on the number of nodes and edges in the graph. """ def _graph_unflatten(space, x): ret = None if space is not None and x is not None: if isinstance(space, Box): ret = x.reshape(-1, *space.shape) elif isinstance(space, Discrete): ret = np.asarray(np.nonzero(x))[-1, :] return ret nodes = _graph_unflatten(space.node_space, x.nodes) edges = _graph_unflatten(space.edge_space, x.edges) return GraphInstance(nodes, edges, x.edge_links) @unflatten.register(Text) def _unflatten_text(space: Text, x: np.ndarray) -> str: return "".join( [space.character_list[val] for val in x if val < len(space.character_set)] ) @unflatten.register(Sequence) def _unflatten_sequence(space: Sequence, x: tuple) -> tuple: return tuple(unflatten(space.feature_space, item) for item in x) @singledispatch def flatten_space(space: Space) -> Union[Dict, Sequence, Tuple, Graph]: """Flatten a space into a space that is as flat as possible. This function will attempt to flatten `space` into a single :class:`Box` space. However, this might not be possible when `space` is an instance of :class:`Graph`, :class:`Sequence` or a compound space that contains a :class:`Graph` or :class:`Sequence`space. This is equivalent to :func:`flatten`, but operates on the space itself. The result for non-graph spaces is always a `Box` with flat boundaries. While the result for graph spaces is always a `Graph` with `node_space` being a `Box` with flat boundaries and `edge_space` being a `Box` with flat boundaries or `None`. The box has exactly :func:`flatdim` dimensions. Flattening a sample of the original space has the same effect as taking a sample of the flattenend space. Example:: >>> box = Box(0.0, 1.0, shape=(3, 4, 5)) >>> box Box(3, 4, 5) >>> flatten_space(box) Box(60,) >>> flatten(box, box.sample()) in flatten_space(box) True Example that flattens a discrete space:: >>> discrete = Discrete(5) >>> flatten_space(discrete) Box(5,) >>> flatten(box, box.sample()) in flatten_space(box) True Example that recursively flattens a dict:: >>> space = Dict({"position": Discrete(2), "velocity": Box(0, 1, shape=(2, 2))}) >>> flatten_space(space) Box(6,) >>> flatten(space, space.sample()) in flatten_space(space) True Example that flattens a graph:: >>> space = Graph(node_space=Box(low=-100, high=100, shape=(3, 4)), edge_space=Discrete(5)) >>> flatten_space(space) Graph(Box(-100.0, 100.0, (12,), float32), Box(0, 1, (5,), int64)) >>> flatten(space, space.sample()) in flatten_space(space) True Args: space: The space to flatten Returns: A flattened Box Raises: NotImplementedError: if the space is not defined in ``gym.spaces``. """ raise NotImplementedError(f"Unknown space: `{space}`") @flatten_space.register(Box) def _flatten_space_box(space: Box) -> Box: return Box(space.low.flatten(), space.high.flatten(), dtype=space.dtype) @flatten_space.register(Discrete) @flatten_space.register(MultiBinary) @flatten_space.register(MultiDiscrete) def _flatten_space_binary(space: Union[Discrete, MultiBinary, MultiDiscrete]) -> Box: return Box(low=0, high=1, shape=(flatdim(space),), dtype=space.dtype) @flatten_space.register(Tuple) def _flatten_space_tuple(space: Tuple) -> Union[Box, Tuple]: if space.is_np_flattenable: space_list = [flatten_space(s) for s in space.spaces] return Box( low=np.concatenate([s.low for s in space_list]), high=np.concatenate([s.high for s in space_list]), dtype=np.result_type(*[s.dtype for s in space_list]), ) return Tuple(spaces=[flatten_space(s) for s in space.spaces]) @flatten_space.register(Dict) def _flatten_space_dict(space: Dict) -> Union[Box, Dict]: if space.is_np_flattenable: space_list = [flatten_space(s) for s in space.spaces.values()] return Box( low=np.concatenate([s.low for s in space_list]), high=np.concatenate([s.high for s in space_list]), dtype=np.result_type(*[s.dtype for s in space_list]), ) return Dict( spaces=OrderedDict( (key, flatten_space(space)) for key, space in space.spaces.items() ) ) @flatten_space.register(Graph) def _flatten_space_graph(space: Graph) -> Graph: return Graph( node_space=flatten_space(space.node_space), edge_space=flatten_space(space.edge_space) if space.edge_space is not None else None, ) @flatten_space.register(Text) def _flatten_space_text(space: Text) -> Box: return Box( low=0, high=len(space.character_set), shape=(space.max_length,), dtype=np.int32 ) @flatten_space.register(Sequence) def _flatten_space_sequence(space: Sequence) -> Sequence: return Sequence(flatten_space(space.feature_space)) ================================================ FILE: gym/utils/__init__.py ================================================ """A set of common utilities used within the environments. These are not intended as API functions, and will not remain stable over time. """ # These submodules should not have any import-time dependencies. # We want this since we use `utils` during our import-time sanity checks # that verify that our dependencies are actually present. from gym.utils.colorize import colorize from gym.utils.ezpickle import EzPickle ================================================ FILE: gym/utils/colorize.py ================================================ """A set of common utilities used within the environments. These are not intended as API functions, and will not remain stable over time. """ color2num = dict( gray=30, red=31, green=32, yellow=33, blue=34, magenta=35, cyan=36, white=37, crimson=38, ) def colorize( string: str, color: str, bold: bool = False, highlight: bool = False ) -> str: """Returns string surrounded by appropriate terminal colour codes to print colourised text. Args: string: The message to colourise color: Literal values are gray, red, green, yellow, blue, magenta, cyan, white, crimson bold: If to bold the string highlight: If to highlight the string Returns: Colourised string """ attr = [] num = color2num[color] if highlight: num += 10 attr.append(str(num)) if bold: attr.append("1") attrs = ";".join(attr) return f"\x1b[{attrs}m{string}\x1b[0m" ================================================ FILE: gym/utils/env_checker.py ================================================ """A set of functions for checking an environment details. This file is originally from the Stable Baselines3 repository hosted on GitHub (https://github.com/DLR-RM/stable-baselines3/) Original Author: Antonin Raffin It also uses some warnings/assertions from the PettingZoo repository hosted on GitHub (https://github.com/PettingZoo-Team/PettingZoo) Original Author: J K Terry This was rewritten and split into "env_checker.py" and "passive_env_checker.py" for invasive and passive environment checking Original Author: Mark Towers These projects are covered by the MIT License. """ import inspect from copy import deepcopy import numpy as np import gym from gym import logger, spaces from gym.utils.passive_env_checker import ( check_action_space, check_observation_space, env_render_passive_checker, env_reset_passive_checker, env_step_passive_checker, ) def data_equivalence(data_1, data_2) -> bool: """Assert equality between data 1 and 2, i.e observations, actions, info. Args: data_1: data structure 1 data_2: data structure 2 Returns: If observation 1 and 2 are equivalent """ if type(data_1) == type(data_2): if isinstance(data_1, dict): return data_1.keys() == data_2.keys() and all( data_equivalence(data_1[k], data_2[k]) for k in data_1.keys() ) elif isinstance(data_1, (tuple, list)): return len(data_1) == len(data_2) and all( data_equivalence(o_1, o_2) for o_1, o_2 in zip(data_1, data_2) ) elif isinstance(data_1, np.ndarray): return data_1.shape == data_2.shape and np.allclose( data_1, data_2, atol=0.00001 ) else: return data_1 == data_2 else: return False def check_reset_seed(env: gym.Env): """Check that the environment can be reset with a seed. Args: env: The environment to check Raises: AssertionError: The environment cannot be reset with a random seed, even though `seed` or `kwargs` appear in the signature. """ signature = inspect.signature(env.reset) if "seed" in signature.parameters or ( "kwargs" in signature.parameters and signature.parameters["kwargs"].kind is inspect.Parameter.VAR_KEYWORD ): try: obs_1, info = env.reset(seed=123) assert ( obs_1 in env.observation_space ), "The observation returned by `env.reset(seed=123)` is not within the observation space." assert ( env.unwrapped._np_random # pyright: ignore [reportPrivateUsage] is not None ), "Expects the random number generator to have been generated given a seed was passed to reset. Mostly likely the environment reset function does not call `super().reset(seed=seed)`." seed_123_rng = deepcopy( env.unwrapped._np_random # pyright: ignore [reportPrivateUsage] ) obs_2, info = env.reset(seed=123) assert ( obs_2 in env.observation_space ), "The observation returned by `env.reset(seed=123)` is not within the observation space." if env.spec is not None and env.spec.nondeterministic is False: assert data_equivalence( obs_1, obs_2 ), "Using `env.reset(seed=123)` is non-deterministic as the observations are not equivalent." assert ( env.unwrapped._np_random.bit_generator.state # pyright: ignore [reportPrivateUsage] == seed_123_rng.bit_generator.state ), "Mostly likely the environment reset function does not call `super().reset(seed=seed)` as the random generates are not same when the same seeds are passed to `env.reset`." obs_3, info = env.reset(seed=456) assert ( obs_3 in env.observation_space ), "The observation returned by `env.reset(seed=456)` is not within the observation space." assert ( env.unwrapped._np_random.bit_generator.state # pyright: ignore [reportPrivateUsage] != seed_123_rng.bit_generator.state ), "Mostly likely the environment reset function does not call `super().reset(seed=seed)` as the random number generators are not different when different seeds are passed to `env.reset`." except TypeError as e: raise AssertionError( "The environment cannot be reset with a random seed, even though `seed` or `kwargs` appear in the signature. " f"This should never happen, please report this issue. The error was: {e}" ) seed_param = signature.parameters.get("seed") # Check the default value is None if seed_param is not None and seed_param.default is not None: logger.warn( "The default seed argument in reset should be `None`, otherwise the environment will by default always be deterministic. " f"Actual default: {seed_param.default}" ) else: raise gym.error.Error( "The `reset` method does not provide a `seed` or `**kwargs` keyword argument." ) def check_reset_options(env: gym.Env): """Check that the environment can be reset with options. Args: env: The environment to check Raises: AssertionError: The environment cannot be reset with options, even though `options` or `kwargs` appear in the signature. """ signature = inspect.signature(env.reset) if "options" in signature.parameters or ( "kwargs" in signature.parameters and signature.parameters["kwargs"].kind is inspect.Parameter.VAR_KEYWORD ): try: env.reset(options={}) except TypeError as e: raise AssertionError( "The environment cannot be reset with options, even though `options` or `**kwargs` appear in the signature. " f"This should never happen, please report this issue. The error was: {e}" ) else: raise gym.error.Error( "The `reset` method does not provide an `options` or `**kwargs` keyword argument." ) def check_reset_return_info_deprecation(env: gym.Env): """Makes sure support for deprecated `return_info` argument is dropped. Args: env: The environment to check Raises: UserWarning """ signature = inspect.signature(env.reset) if "return_info" in signature.parameters: logger.warn( "`return_info` is deprecated as an optional argument to `reset`. `reset`" "should now always return `obs, info` where `obs` is an observation, and `info` is a dictionary" "containing additional information." ) def check_seed_deprecation(env: gym.Env): """Makes sure support for deprecated function `seed` is dropped. Args: env: The environment to check Raises: UserWarning """ seed_fn = getattr(env, "seed", None) if callable(seed_fn): logger.warn( "Official support for the `seed` function is dropped. " "Standard practice is to reset gym environments using `env.reset(seed=)`" ) def check_reset_return_type(env: gym.Env): """Checks that :meth:`reset` correctly returns a tuple of the form `(obs , info)`. Args: env: The environment to check Raises: AssertionError depending on spec violation """ result = env.reset() assert isinstance( result, tuple ), f"The result returned by `env.reset()` was not a tuple of the form `(obs, info)`, where `obs` is a observation and `info` is a dictionary containing additional information. Actual type: `{type(result)}`" assert ( len(result) == 2 ), f"Calling the reset method did not return a 2-tuple, actual length: {len(result)}" obs, info = result assert ( obs in env.observation_space ), "The first element returned by `env.reset()` is not within the observation space." assert isinstance( info, dict ), f"The second element returned by `env.reset()` was not a dictionary, actual type: {type(info)}" def check_space_limit(space, space_type: str): """Check the space limit for only the Box space as a test that only runs as part of `check_env`.""" if isinstance(space, spaces.Box): if np.any(np.equal(space.low, -np.inf)): logger.warn( f"A Box {space_type} space minimum value is -infinity. This is probably too low." ) if np.any(np.equal(space.high, np.inf)): logger.warn( f"A Box {space_type} space maximum value is -infinity. This is probably too high." ) # Check that the Box space is normalized if space_type == "action": if len(space.shape) == 1: # for vector boxes if ( np.any( np.logical_and( space.low != np.zeros_like(space.low), np.abs(space.low) != np.abs(space.high), ) ) or np.any(space.low < -1) or np.any(space.high > 1) ): # todo - Add to gymlibrary.ml? logger.warn( "For Box action spaces, we recommend using a symmetric and normalized space (range=[-1, 1] or [0, 1]). " "See https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html for more information." ) elif isinstance(space, spaces.Tuple): for subspace in space.spaces: check_space_limit(subspace, space_type) elif isinstance(space, spaces.Dict): for subspace in space.values(): check_space_limit(subspace, space_type) def check_env(env: gym.Env, warn: bool = None, skip_render_check: bool = False): """Check that an environment follows Gym API. This is an invasive function that calls the environment's reset and step. This is particularly useful when using a custom environment. Please take a look at https://www.gymlibrary.dev/content/environment_creation/ for more information about the API. Args: env: The Gym environment that will be checked warn: Ignored skip_render_check: Whether to skip the checks for the render method. True by default (useful for the CI) """ if warn is not None: logger.warn("`check_env(warn=...)` parameter is now ignored.") assert isinstance( env, gym.Env ), "The environment must inherit from the gym.Env class. See https://www.gymlibrary.dev/content/environment_creation/ for more info." if env.unwrapped is not env: logger.warn( f"The environment ({env}) is different from the unwrapped version ({env.unwrapped}). This could effect the environment checker as the environment most likely has a wrapper applied to it. We recommend using the raw environment for `check_env` using `env.unwrapped`." ) # ============= Check the spaces (observation and action) ================ assert hasattr( env, "action_space" ), "The environment must specify an action space. See https://www.gymlibrary.dev/content/environment_creation/ for more info." check_action_space(env.action_space) check_space_limit(env.action_space, "action") assert hasattr( env, "observation_space" ), "The environment must specify an observation space. See https://www.gymlibrary.dev/content/environment_creation/ for more info." check_observation_space(env.observation_space) check_space_limit(env.observation_space, "observation") # ==== Check the reset method ==== check_seed_deprecation(env) check_reset_return_info_deprecation(env) check_reset_return_type(env) check_reset_seed(env) check_reset_options(env) # ============ Check the returned values =============== env_reset_passive_checker(env) env_step_passive_checker(env, env.action_space.sample()) # ==== Check the render method and the declared render modes ==== if not skip_render_check: if env.render_mode is not None: env_render_passive_checker(env) # todo: recreate the environment with a different render_mode for check that each work ================================================ FILE: gym/utils/ezpickle.py ================================================ """Class for pickling and unpickling objects via their constructor arguments.""" class EzPickle: """Objects that are pickled and unpickled via their constructor arguments. Example:: >>> class Dog(Animal, EzPickle): ... def __init__(self, furcolor, tailkind="bushy"): ... Animal.__init__() ... EzPickle.__init__(furcolor, tailkind) When this object is unpickled, a new ``Dog`` will be constructed by passing the provided furcolor and tailkind into the constructor. However, philosophers are still not sure whether it is still the same dog. This is generally needed only for environments which wrap C/C++ code, such as MuJoCo and Atari. """ def __init__(self, *args, **kwargs): """Uses the ``args`` and ``kwargs`` from the object's constructor for pickling.""" self._ezpickle_args = args self._ezpickle_kwargs = kwargs def __getstate__(self): """Returns the object pickle state with args and kwargs.""" return { "_ezpickle_args": self._ezpickle_args, "_ezpickle_kwargs": self._ezpickle_kwargs, } def __setstate__(self, d): """Sets the object pickle state using d.""" out = type(self)(*d["_ezpickle_args"], **d["_ezpickle_kwargs"]) self.__dict__.update(out.__dict__) ================================================ FILE: gym/utils/passive_env_checker.py ================================================ """A set of functions for passively checking environment implementations.""" import inspect from functools import partial from typing import Callable import numpy as np from gym import Space, error, logger, spaces def _check_box_observation_space(observation_space: spaces.Box): """Checks that a :class:`Box` observation space is defined in a sensible way. Args: observation_space: A box observation space """ # Check if the box is an image if len(observation_space.shape) == 3: if observation_space.dtype != np.uint8: logger.warn( f"It seems a Box observation space is an image but the `dtype` is not `np.uint8`, actual type: {observation_space.dtype}. " "If the Box observation space is not an image, we recommend flattening the observation to have only a 1D vector." ) if np.any(observation_space.low != 0) or np.any(observation_space.high != 255): logger.warn( "It seems a Box observation space is an image but the upper and lower bounds are not in [0, 255]. " "Generally, CNN policies assume observations are within that range, so you may encounter an issue if the observation values are not." ) if len(observation_space.shape) not in [1, 3]: logger.warn( "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). " "We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. " f"Actual observation shape: {observation_space.shape}" ) assert ( observation_space.low.shape == observation_space.shape ), f"The Box observation space shape and low shape have different shapes, low shape: {observation_space.low.shape}, box shape: {observation_space.shape}" assert ( observation_space.high.shape == observation_space.shape ), f"The Box observation space shape and high shape have have different shapes, high shape: {observation_space.high.shape}, box shape: {observation_space.shape}" if np.any(observation_space.low == observation_space.high): logger.warn("A Box observation space maximum and minimum values are equal.") elif np.any(observation_space.high < observation_space.low): logger.warn("A Box observation space low value is greater than a high value.") def _check_box_action_space(action_space: spaces.Box): """Checks that a :class:`Box` action space is defined in a sensible way. Args: action_space: A box action space """ assert ( action_space.low.shape == action_space.shape ), f"The Box action space shape and low shape have have different shapes, low shape: {action_space.low.shape}, box shape: {action_space.shape}" assert ( action_space.high.shape == action_space.shape ), f"The Box action space shape and high shape have different shapes, high shape: {action_space.high.shape}, box shape: {action_space.shape}" if np.any(action_space.low == action_space.high): logger.warn("A Box action space maximum and minimum values are equal.") elif np.any(action_space.high < action_space.low): logger.warn("A Box action space low value is greater than a high value.") def check_space( space: Space, space_type: str, check_box_space_fn: Callable[[spaces.Box], None] ): """A passive check of the environment action space that should not affect the environment.""" if not isinstance(space, spaces.Space): raise AssertionError( f"{space_type} space does not inherit from `gym.spaces.Space`, actual type: {type(space)}" ) elif isinstance(space, spaces.Box): check_box_space_fn(space) elif isinstance(space, spaces.Discrete): assert ( 0 < space.n ), f"Discrete {space_type} space's number of elements must be positive, actual number of elements: {space.n}" assert ( space.shape == () ), f"Discrete {space_type} space's shape should be empty, actual shape: {space.shape}" elif isinstance(space, spaces.MultiDiscrete): assert ( space.shape == space.nvec.shape ), f"Multi-discrete {space_type} space's shape must be equal to the nvec shape, space shape: {space.shape}, nvec shape: {space.nvec.shape}" assert np.all( 0 < space.nvec ), f"Multi-discrete {space_type} space's all nvec elements must be greater than 0, actual nvec: {space.nvec}" elif isinstance(space, spaces.MultiBinary): assert np.all( 0 < np.asarray(space.shape) ), f"Multi-binary {space_type} space's all shape elements must be greater than 0, actual shape: {space.shape}" elif isinstance(space, spaces.Tuple): assert 0 < len( space.spaces ), f"An empty Tuple {space_type} space is not allowed." for subspace in space.spaces: check_space(subspace, space_type, check_box_space_fn) elif isinstance(space, spaces.Dict): assert 0 < len( space.spaces.keys() ), f"An empty Dict {space_type} space is not allowed." for subspace in space.values(): check_space(subspace, space_type, check_box_space_fn) check_observation_space = partial( check_space, space_type="observation", check_box_space_fn=_check_box_observation_space, ) check_action_space = partial( check_space, space_type="action", check_box_space_fn=_check_box_action_space ) def check_obs(obs, observation_space: spaces.Space, method_name: str): """Check that the observation returned by the environment correspond to the declared one. Args: obs: The observation to check observation_space: The observation space of the observation method_name: The method name that generated the observation """ pre = f"The obs returned by the `{method_name}()` method" if isinstance(observation_space, spaces.Discrete): if not isinstance(obs, (np.int64, int)): logger.warn(f"{pre} should be an int or np.int64, actual type: {type(obs)}") elif isinstance(observation_space, spaces.Box): if observation_space.shape != (): if not isinstance(obs, np.ndarray): logger.warn( f"{pre} was expecting a numpy array, actual type: {type(obs)}" ) elif obs.dtype != observation_space.dtype: logger.warn( f"{pre} was expecting numpy array dtype to be {observation_space.dtype}, actual type: {obs.dtype}" ) elif isinstance(observation_space, (spaces.MultiBinary, spaces.MultiDiscrete)): if not isinstance(obs, np.ndarray): logger.warn(f"{pre} was expecting a numpy array, actual type: {type(obs)}") elif isinstance(observation_space, spaces.Tuple): if not isinstance(obs, tuple): logger.warn(f"{pre} was expecting a tuple, actual type: {type(obs)}") assert len(obs) == len( observation_space.spaces ), f"{pre} length is not same as the observation space length, obs length: {len(obs)}, space length: {len(observation_space.spaces)}" for sub_obs, sub_space in zip(obs, observation_space.spaces): check_obs(sub_obs, sub_space, method_name) elif isinstance(observation_space, spaces.Dict): assert isinstance(obs, dict), f"{pre} must be a dict, actual type: {type(obs)}" assert ( obs.keys() == observation_space.spaces.keys() ), f"{pre} observation keys is not same as the observation space keys, obs keys: {list(obs.keys())}, space keys: {list(observation_space.spaces.keys())}" for space_key in observation_space.spaces.keys(): check_obs(obs[space_key], observation_space[space_key], method_name) try: if obs not in observation_space: logger.warn(f"{pre} is not within the observation space.") except Exception as e: logger.warn(f"{pre} is not within the observation space with exception: {e}") def env_reset_passive_checker(env, **kwargs): """A passive check of the `Env.reset` function investigating the returning reset information and returning the data unchanged.""" signature = inspect.signature(env.reset) if "seed" not in signature.parameters and "kwargs" not in signature.parameters: logger.warn( "Future gym versions will require that `Env.reset` can be passed a `seed` instead of using `Env.seed` for resetting the environment random number generator." ) else: seed_param = signature.parameters.get("seed") # Check the default value is None if seed_param is not None and seed_param.default is not None: logger.warn( "The default seed argument in `Env.reset` should be `None`, otherwise the environment will by default always be deterministic. " f"Actual default: {seed_param}" ) if "options" not in signature.parameters and "kwargs" not in signature.parameters: logger.warn( "Future gym versions will require that `Env.reset` can be passed `options` to allow the environment initialisation to be passed additional information." ) # Checks the result of env.reset with kwargs result = env.reset(**kwargs) if not isinstance(result, tuple): logger.warn( f"The result returned by `env.reset()` was not a tuple of the form `(obs, info)`, where `obs` is a observation and `info` is a dictionary containing additional information. Actual type: `{type(result)}`" ) elif len(result) != 2: logger.warn( "The result returned by `env.reset()` should be `(obs, info)` by default, , where `obs` is a observation and `info` is a dictionary containing additional information." ) else: obs, info = result check_obs(obs, env.observation_space, "reset") assert isinstance( info, dict ), f"The second element returned by `env.reset()` was not a dictionary, actual type: {type(info)}" return result def env_step_passive_checker(env, action): """A passive check for the environment step, investigating the returning data then returning the data unchanged.""" # We don't check the action as for some environments then out-of-bounds values can be given result = env.step(action) assert isinstance( result, tuple ), f"Expects step result to be a tuple, actual type: {type(result)}" if len(result) == 4: logger.deprecation( "Core environment is written in old step API which returns one bool instead of two. " "It is recommended to rewrite the environment with new step API. " ) obs, reward, done, info = result if not isinstance(done, (bool, np.bool8)): logger.warn( f"Expects `done` signal to be a boolean, actual type: {type(done)}" ) elif len(result) == 5: obs, reward, terminated, truncated, info = result # np.bool is actual python bool not np boolean type, therefore bool_ or bool8 if not isinstance(terminated, (bool, np.bool8)): logger.warn( f"Expects `terminated` signal to be a boolean, actual type: {type(terminated)}" ) if not isinstance(truncated, (bool, np.bool8)): logger.warn( f"Expects `truncated` signal to be a boolean, actual type: {type(truncated)}" ) else: raise error.Error( f"Expected `Env.step` to return a four or five element tuple, actual number of elements returned: {len(result)}." ) check_obs(obs, env.observation_space, "step") if not ( np.issubdtype(type(reward), np.integer) or np.issubdtype(type(reward), np.floating) ): logger.warn( f"The reward returned by `step()` must be a float, int, np.integer or np.floating, actual type: {type(reward)}" ) else: if np.isnan(reward): logger.warn("The reward is a NaN value.") if np.isinf(reward): logger.warn("The reward is an inf value.") assert isinstance( info, dict ), f"The `info` returned by `step()` must be a python dictionary, actual type: {type(info)}" return result def env_render_passive_checker(env, *args, **kwargs): """A passive check of the `Env.render` that the declared render modes/fps in the metadata of the environment is declared.""" render_modes = env.metadata.get("render_modes") if render_modes is None: logger.warn( "No render modes was declared in the environment (env.metadata['render_modes'] is None or not defined), you may have trouble when calling `.render()`." ) else: if not isinstance(render_modes, (list, tuple)): logger.warn( f"Expects the render_modes to be a sequence (i.e. list, tuple), actual type: {type(render_modes)}" ) elif not all(isinstance(mode, str) for mode in render_modes): logger.warn( f"Expects all render modes to be strings, actual types: {[type(mode) for mode in render_modes]}" ) render_fps = env.metadata.get("render_fps") # We only require `render_fps` if rendering is actually implemented if len(render_modes) > 0: if render_fps is None: logger.warn( "No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps." ) else: if not ( np.issubdtype(type(render_fps), np.integer) or np.issubdtype(type(render_fps), np.floating) ): logger.warn( f"Expects the `env.metadata['render_fps']` to be an integer or a float, actual type: {type(render_fps)}" ) else: assert ( render_fps > 0 ), f"Expects the `env.metadata['render_fps']` to be greater than zero, actual value: {render_fps}" # env.render is now an attribute with default None if len(render_modes) == 0: assert ( env.render_mode is None ), f"With no render_modes, expects the Env.render_mode to be None, actual value: {env.render_mode}" else: assert env.render_mode is None or env.render_mode in render_modes, ( "The environment was initialized successfully however with an unsupported render mode. " f"Render mode: {env.render_mode}, modes: {render_modes}" ) result = env.render(*args, **kwargs) # TODO: Check that the result is correct return result ================================================ FILE: gym/utils/play.py ================================================ """Utilities of visualising an environment.""" from collections import deque from typing import Callable, Dict, List, Optional, Tuple, Union import numpy as np import gym.error from gym import Env, logger from gym.core import ActType, ObsType from gym.error import DependencyNotInstalled from gym.logger import deprecation try: import pygame from pygame import Surface from pygame.event import Event from pygame.locals import VIDEORESIZE except ImportError: raise gym.error.DependencyNotInstalled( "Pygame is not installed, run `pip install gym[classic_control]`" ) try: import matplotlib matplotlib.use("TkAgg") import matplotlib.pyplot as plt except ImportError: logger.warn("Matplotlib is not installed, run `pip install gym[other]`") matplotlib, plt = None, None class MissingKeysToAction(Exception): """Raised when the environment does not have a default ``keys_to_action`` mapping.""" class PlayableGame: """Wraps an environment allowing keyboard inputs to interact with the environment.""" def __init__( self, env: Env, keys_to_action: Optional[Dict[Tuple[int, ...], int]] = None, zoom: Optional[float] = None, ): """Wraps an environment with a dictionary of keyboard buttons to action and if to zoom in on the environment. Args: env: The environment to play keys_to_action: The dictionary of keyboard tuples and action value zoom: If to zoom in on the environment render """ if env.render_mode not in {"rgb_array", "rgb_array_list"}: logger.error( "PlayableGame wrapper works only with rgb_array and rgb_array_list render modes, " f"but your environment render_mode = {env.render_mode}." ) self.env = env self.relevant_keys = self._get_relevant_keys(keys_to_action) self.video_size = self._get_video_size(zoom) self.screen = pygame.display.set_mode(self.video_size) self.pressed_keys = [] self.running = True def _get_relevant_keys( self, keys_to_action: Optional[Dict[Tuple[int], int]] = None ) -> set: if keys_to_action is None: if hasattr(self.env, "get_keys_to_action"): keys_to_action = self.env.get_keys_to_action() elif hasattr(self.env.unwrapped, "get_keys_to_action"): keys_to_action = self.env.unwrapped.get_keys_to_action() else: raise MissingKeysToAction( f"{self.env.spec.id} does not have explicit key to action mapping, " "please specify one manually" ) assert isinstance(keys_to_action, dict) relevant_keys = set(sum((list(k) for k in keys_to_action.keys()), [])) return relevant_keys def _get_video_size(self, zoom: Optional[float] = None) -> Tuple[int, int]: rendered = self.env.render() if isinstance(rendered, List): rendered = rendered[-1] assert rendered is not None and isinstance(rendered, np.ndarray) video_size = (rendered.shape[1], rendered.shape[0]) if zoom is not None: video_size = (int(video_size[0] * zoom), int(video_size[1] * zoom)) return video_size def process_event(self, event: Event): """Processes a PyGame event. In particular, this function is used to keep track of which buttons are currently pressed and to exit the :func:`play` function when the PyGame window is closed. Args: event: The event to process """ if event.type == pygame.KEYDOWN: if event.key in self.relevant_keys: self.pressed_keys.append(event.key) elif event.key == pygame.K_ESCAPE: self.running = False elif event.type == pygame.KEYUP: if event.key in self.relevant_keys: self.pressed_keys.remove(event.key) elif event.type == pygame.QUIT: self.running = False elif event.type == VIDEORESIZE: self.video_size = event.size self.screen = pygame.display.set_mode(self.video_size) def display_arr( screen: Surface, arr: np.ndarray, video_size: Tuple[int, int], transpose: bool ): """Displays a numpy array on screen. Args: screen: The screen to show the array on arr: The array to show video_size: The video size of the screen transpose: If to transpose the array on the screen """ arr_min, arr_max = np.min(arr), np.max(arr) arr = 255.0 * (arr - arr_min) / (arr_max - arr_min) pyg_img = pygame.surfarray.make_surface(arr.swapaxes(0, 1) if transpose else arr) pyg_img = pygame.transform.scale(pyg_img, video_size) screen.blit(pyg_img, (0, 0)) def play( env: Env, transpose: Optional[bool] = True, fps: Optional[int] = None, zoom: Optional[float] = None, callback: Optional[Callable] = None, keys_to_action: Optional[Dict[Union[Tuple[Union[str, int]], str], ActType]] = None, seed: Optional[int] = None, noop: ActType = 0, ): """Allows one to play the game using keyboard. Example:: >>> import gym >>> from gym.utils.play import play >>> play(gym.make("CarRacing-v1", render_mode="rgb_array"), keys_to_action={ ... "w": np.array([0, 0.7, 0]), ... "a": np.array([-1, 0, 0]), ... "s": np.array([0, 0, 1]), ... "d": np.array([1, 0, 0]), ... "wa": np.array([-1, 0.7, 0]), ... "dw": np.array([1, 0.7, 0]), ... "ds": np.array([1, 0, 1]), ... "as": np.array([-1, 0, 1]), ... }, noop=np.array([0,0,0])) Above code works also if the environment is wrapped, so it's particularly useful in verifying that the frame-level preprocessing does not render the game unplayable. If you wish to plot real time statistics as you play, you can use :class:`gym.utils.play.PlayPlot`. Here's a sample code for plotting the reward for last 150 steps. >>> def callback(obs_t, obs_tp1, action, rew, terminated, truncated, info): ... return [rew,] >>> plotter = PlayPlot(callback, 150, ["reward"]) >>> play(gym.make("ALE/AirRaid-v5"), callback=plotter.callback) Args: env: Environment to use for playing. transpose: If this is ``True``, the output of observation is transposed. Defaults to ``True``. fps: Maximum number of steps of the environment executed every second. If ``None`` (the default), ``env.metadata["render_fps""]`` (or 30, if the environment does not specify "render_fps") is used. zoom: Zoom the observation in, ``zoom`` amount, should be positive float callback: If a callback is provided, it will be executed after every step. It takes the following input: obs_t: observation before performing action obs_tp1: observation after performing action action: action that was executed rew: reward that was received terminated: whether the environment is terminated or not truncated: whether the environment is truncated or not info: debug info keys_to_action: Mapping from keys pressed to action performed. Different formats are supported: Key combinations can either be expressed as a tuple of unicode code points of the keys, as a tuple of characters, or as a string where each character of the string represents one key. For example if pressing 'w' and space at the same time is supposed to trigger action number 2 then ``key_to_action`` dict could look like this: >>> { ... # ... ... (ord('w'), ord(' ')): 2 ... # ... ... } or like this: >>> { ... # ... ... ("w", " "): 2 ... # ... ... } or like this: >>> { ... # ... ... "w ": 2 ... # ... ... } If ``None``, default ``key_to_action`` mapping for that environment is used, if provided. seed: Random seed used when resetting the environment. If None, no seed is used. noop: The action used when no key input has been entered, or the entered key combination is unknown. """ env.reset(seed=seed) if keys_to_action is None: if hasattr(env, "get_keys_to_action"): keys_to_action = env.get_keys_to_action() elif hasattr(env.unwrapped, "get_keys_to_action"): keys_to_action = env.unwrapped.get_keys_to_action() else: raise MissingKeysToAction( f"{env.spec.id} does not have explicit key to action mapping, " "please specify one manually" ) assert keys_to_action is not None key_code_to_action = {} for key_combination, action in keys_to_action.items(): key_code = tuple( sorted(ord(key) if isinstance(key, str) else key for key in key_combination) ) key_code_to_action[key_code] = action game = PlayableGame(env, key_code_to_action, zoom) if fps is None: fps = env.metadata.get("render_fps", 30) done, obs = True, None clock = pygame.time.Clock() while game.running: if done: done = False obs = env.reset(seed=seed) else: action = key_code_to_action.get(tuple(sorted(game.pressed_keys)), noop) prev_obs = obs obs, rew, terminated, truncated, info = env.step(action) done = terminated or truncated if callback is not None: callback(prev_obs, obs, action, rew, terminated, truncated, info) if obs is not None: rendered = env.render() if isinstance(rendered, List): rendered = rendered[-1] assert rendered is not None and isinstance(rendered, np.ndarray) display_arr( game.screen, rendered, transpose=transpose, video_size=game.video_size ) # process pygame events for event in pygame.event.get(): game.process_event(event) pygame.display.flip() clock.tick(fps) pygame.quit() class PlayPlot: """Provides a callback to create live plots of arbitrary metrics when using :func:`play`. This class is instantiated with a function that accepts information about a single environment transition: - obs_t: observation before performing action - obs_tp1: observation after performing action - action: action that was executed - rew: reward that was received - terminated: whether the environment is terminated or not - truncated: whether the environment is truncated or not - info: debug info It should return a list of metrics that are computed from this data. For instance, the function may look like this:: >>> def compute_metrics(obs_t, obs_tp, action, reward, terminated, truncated, info): ... return [reward, info["cumulative_reward"], np.linalg.norm(action)] :class:`PlayPlot` provides the method :meth:`callback` which will pass its arguments along to that function and uses the returned values to update live plots of the metrics. Typically, this :meth:`callback` will be used in conjunction with :func:`play` to see how the metrics evolve as you play:: >>> plotter = PlayPlot(compute_metrics, horizon_timesteps=200, ... plot_names=["Immediate Rew.", "Cumulative Rew.", "Action Magnitude"]) >>> play(your_env, callback=plotter.callback) """ def __init__( self, callback: callable, horizon_timesteps: int, plot_names: List[str] ): """Constructor of :class:`PlayPlot`. The function ``callback`` that is passed to this constructor should return a list of metrics that is of length ``len(plot_names)``. Args: callback: Function that computes metrics from environment transitions horizon_timesteps: The time horizon used for the live plots plot_names: List of plot titles Raises: DependencyNotInstalled: If matplotlib is not installed """ deprecation( "`PlayPlot` is marked as deprecated and will be removed in the near future." ) self.data_callback = callback self.horizon_timesteps = horizon_timesteps self.plot_names = plot_names if plt is None: raise DependencyNotInstalled( "matplotlib is not installed, run `pip install gym[other]`" ) num_plots = len(self.plot_names) self.fig, self.ax = plt.subplots(num_plots) if num_plots == 1: self.ax = [self.ax] for axis, name in zip(self.ax, plot_names): axis.set_title(name) self.t = 0 self.cur_plot: List[Optional[plt.Axes]] = [None for _ in range(num_plots)] self.data = [deque(maxlen=horizon_timesteps) for _ in range(num_plots)] def callback( self, obs_t: ObsType, obs_tp1: ObsType, action: ActType, rew: float, terminated: bool, truncated: bool, info: dict, ): """The callback that calls the provided data callback and adds the data to the plots. Args: obs_t: The observation at time step t obs_tp1: The observation at time step t+1 action: The action rew: The reward terminated: If the environment is terminated truncated: If the environment is truncated info: The information from the environment """ points = self.data_callback( obs_t, obs_tp1, action, rew, terminated, truncated, info ) for point, data_series in zip(points, self.data): data_series.append(point) self.t += 1 xmin, xmax = max(0, self.t - self.horizon_timesteps), self.t for i, plot in enumerate(self.cur_plot): if plot is not None: plot.remove() self.cur_plot[i] = self.ax[i].scatter( range(xmin, xmax), list(self.data[i]), c="blue" ) self.ax[i].set_xlim(xmin, xmax) if plt is None: raise DependencyNotInstalled( "matplotlib is not installed, run `pip install gym[other]`" ) plt.pause(0.000001) ================================================ FILE: gym/utils/save_video.py ================================================ """Utility functions to save rendering videos.""" import os from typing import Callable, Optional import gym from gym import logger try: from moviepy.video.io.ImageSequenceClip import ImageSequenceClip except ImportError: raise gym.error.DependencyNotInstalled( "MoviePy is not installed, run `pip install moviepy`" ) def capped_cubic_video_schedule(episode_id: int) -> bool: """The default episode trigger. This function will trigger recordings at the episode indices 0, 1, 4, 8, 27, ..., :math:`k^3`, ..., 729, 1000, 2000, 3000, ... Args: episode_id: The episode number Returns: If to apply a video schedule number """ if episode_id < 1000: return int(round(episode_id ** (1.0 / 3))) ** 3 == episode_id else: return episode_id % 1000 == 0 def save_video( frames: list, video_folder: str, episode_trigger: Callable[[int], bool] = None, step_trigger: Callable[[int], bool] = None, video_length: Optional[int] = None, name_prefix: str = "rl-video", episode_index: int = 0, step_starting_index: int = 0, **kwargs, ): """Save videos from rendering frames. This function extract video from a list of render frame episodes. Args: frames (List[RenderFrame]): A list of frames to compose the video. video_folder (str): The folder where the recordings will be stored episode_trigger: Function that accepts an integer and returns ``True`` iff a recording should be started at this episode step_trigger: Function that accepts an integer and returns ``True`` iff a recording should be started at this step video_length (int): The length of recorded episodes. If it isn't specified, the entire episode is recorded. Otherwise, snippets of the specified length are captured. name_prefix (str): Will be prepended to the filename of the recordings. episode_index (int): The index of the current episode. step_starting_index (int): The step index of the first frame. **kwargs: The kwargs that will be passed to moviepy's ImageSequenceClip. You need to specify either fps or duration. Example: >>> import gym >>> from gym.utils.save_video import save_video >>> env = gym.make("FrozenLake-v1", render_mode="rgb_array_list") >>> env.reset() >>> step_starting_index = 0 >>> episode_index = 0 >>> for step_index in range(199): ... action = env.action_space.sample() ... _, _, done, _ = env.step(action) ... if done: ... save_video( ... env.render(), ... "videos", ... fps=env.metadata["render_fps"], ... step_starting_index=step_starting_index, ... episode_index=episode_index ... ) ... step_starting_index = step_index + 1 ... episode_index += 1 ... env.reset() >>> env.close() """ if not isinstance(frames, list): logger.error(f"Expected a list of frames, got a {type(frames)} instead.") if episode_trigger is None and step_trigger is None: episode_trigger = capped_cubic_video_schedule video_folder = os.path.abspath(video_folder) os.makedirs(video_folder, exist_ok=True) path_prefix = f"{video_folder}/{name_prefix}" if episode_trigger is not None and episode_trigger(episode_index): clip = ImageSequenceClip(frames[:video_length], **kwargs) clip.write_videofile(f"{path_prefix}-episode-{episode_index}.mp4") if step_trigger is not None: # skip the first frame since it comes from reset for step_index, frame_index in enumerate( range(1, len(frames)), start=step_starting_index ): if step_trigger(step_index): end_index = ( frame_index + video_length if video_length is not None else None ) clip = ImageSequenceClip(frames[frame_index:end_index], **kwargs) clip.write_videofile(f"{path_prefix}-step-{step_index}.mp4") ================================================ FILE: gym/utils/seeding.py ================================================ """Set of random number generator functions: seeding, generator, hashing seeds.""" from typing import Any, Optional, Tuple import numpy as np from gym import error def np_random(seed: Optional[int] = None) -> Tuple[np.random.Generator, Any]: """Generates a random number generator from the seed and returns the Generator and seed. Args: seed: The seed used to create the generator Returns: The generator and resulting seed Raises: Error: Seed must be a non-negative integer or omitted """ if seed is not None and not (isinstance(seed, int) and 0 <= seed): raise error.Error(f"Seed must be a non-negative integer or omitted, not {seed}") seed_seq = np.random.SeedSequence(seed) np_seed = seed_seq.entropy rng = RandomNumberGenerator(np.random.PCG64(seed_seq)) return rng, np_seed RNG = RandomNumberGenerator = np.random.Generator ================================================ FILE: gym/utils/step_api_compatibility.py ================================================ """Contains methods for step compatibility, from old-to-new and new-to-old API.""" from typing import Tuple, Union import numpy as np from gym.core import ObsType DoneStepType = Tuple[ Union[ObsType, np.ndarray], Union[float, np.ndarray], Union[bool, np.ndarray], Union[dict, list], ] TerminatedTruncatedStepType = Tuple[ Union[ObsType, np.ndarray], Union[float, np.ndarray], Union[bool, np.ndarray], Union[bool, np.ndarray], Union[dict, list], ] def convert_to_terminated_truncated_step_api( step_returns: Union[DoneStepType, TerminatedTruncatedStepType], is_vector_env=False ) -> TerminatedTruncatedStepType: """Function to transform step returns to new step API irrespective of input API. Args: step_returns (tuple): Items returned by step(). Can be (obs, rew, done, info) or (obs, rew, terminated, truncated, info) is_vector_env (bool): Whether the step_returns are from a vector environment """ if len(step_returns) == 5: return step_returns else: assert len(step_returns) == 4 observations, rewards, dones, infos = step_returns # Cases to handle - info single env / info vector env (list) / info vector env (dict) if is_vector_env is False: truncated = infos.pop("TimeLimit.truncated", False) return ( observations, rewards, dones and not truncated, dones and truncated, infos, ) elif isinstance(infos, list): truncated = np.array( [info.pop("TimeLimit.truncated", False) for info in infos] ) return ( observations, rewards, np.logical_and(dones, np.logical_not(truncated)), np.logical_and(dones, truncated), infos, ) elif isinstance(infos, dict): num_envs = len(dones) truncated = infos.pop("TimeLimit.truncated", np.zeros(num_envs, dtype=bool)) return ( observations, rewards, np.logical_and(dones, np.logical_not(truncated)), np.logical_and(dones, truncated), infos, ) else: raise TypeError( f"Unexpected value of infos, as is_vector_envs=False, expects `info` to be a list or dict, actual type: {type(infos)}" ) def convert_to_done_step_api( step_returns: Union[TerminatedTruncatedStepType, DoneStepType], is_vector_env: bool = False, ) -> DoneStepType: """Function to transform step returns to old step API irrespective of input API. Args: step_returns (tuple): Items returned by step(). Can be (obs, rew, done, info) or (obs, rew, terminated, truncated, info) is_vector_env (bool): Whether the step_returns are from a vector environment """ if len(step_returns) == 4: return step_returns else: assert len(step_returns) == 5 observations, rewards, terminated, truncated, infos = step_returns # Cases to handle - info single env / info vector env (list) / info vector env (dict) if is_vector_env is False: if truncated or terminated: infos["TimeLimit.truncated"] = truncated and not terminated return ( observations, rewards, terminated or truncated, infos, ) elif isinstance(infos, list): for info, env_truncated, env_terminated in zip( infos, truncated, terminated ): if env_truncated or env_terminated: info["TimeLimit.truncated"] = env_truncated and not env_terminated return ( observations, rewards, np.logical_or(terminated, truncated), infos, ) elif isinstance(infos, dict): if np.logical_or(np.any(truncated), np.any(terminated)): infos["TimeLimit.truncated"] = np.logical_and( truncated, np.logical_not(terminated) ) return ( observations, rewards, np.logical_or(terminated, truncated), infos, ) else: raise TypeError( f"Unexpected value of infos, as is_vector_envs=False, expects `info` to be a list or dict, actual type: {type(infos)}" ) def step_api_compatibility( step_returns: Union[TerminatedTruncatedStepType, DoneStepType], output_truncation_bool: bool = True, is_vector_env: bool = False, ) -> Union[TerminatedTruncatedStepType, DoneStepType]: """Function to transform step returns to the API specified by `output_truncation_bool` bool. Done (old) step API refers to step() method returning (observation, reward, done, info) Terminated Truncated (new) step API refers to step() method returning (observation, reward, terminated, truncated, info) (Refer to docs for details on the API change) Args: step_returns (tuple): Items returned by step(). Can be (obs, rew, done, info) or (obs, rew, terminated, truncated, info) output_truncation_bool (bool): Whether the output should return two booleans (new API) or one (old) (True by default) is_vector_env (bool): Whether the step_returns are from a vector environment Returns: step_returns (tuple): Depending on `output_truncation_bool` bool, it can return (obs, rew, done, info) or (obs, rew, terminated, truncated, info) Examples: This function can be used to ensure compatibility in step interfaces with conflicting API. Eg. if env is written in old API, wrapper is written in new API, and the final step output is desired to be in old API. >>> obs, rew, done, info = step_api_compatibility(env.step(action), output_truncation_bool=False) >>> obs, rew, terminated, truncated, info = step_api_compatibility(env.step(action), output_truncation_bool=True) >>> observations, rewards, dones, infos = step_api_compatibility(vec_env.step(action), is_vector_env=True) """ if output_truncation_bool: return convert_to_terminated_truncated_step_api(step_returns, is_vector_env) else: return convert_to_done_step_api(step_returns, is_vector_env) ================================================ FILE: gym/vector/__init__.py ================================================ """Module for vector environments.""" from typing import Iterable, List, Optional, Union import gym from gym.vector.async_vector_env import AsyncVectorEnv from gym.vector.sync_vector_env import SyncVectorEnv from gym.vector.vector_env import VectorEnv, VectorEnvWrapper __all__ = ["AsyncVectorEnv", "SyncVectorEnv", "VectorEnv", "VectorEnvWrapper", "make"] def make( id: str, num_envs: int = 1, asynchronous: bool = True, wrappers: Optional[Union[callable, List[callable]]] = None, disable_env_checker: Optional[bool] = None, **kwargs, ) -> VectorEnv: """Create a vectorized environment from multiple copies of an environment, from its id. Example:: >>> import gym >>> env = gym.vector.make('CartPole-v1', num_envs=3) >>> env.reset() array([[-0.04456399, 0.04653909, 0.01326909, -0.02099827], [ 0.03073904, 0.00145001, -0.03088818, -0.03131252], [ 0.03468829, 0.01500225, 0.01230312, 0.01825218]], dtype=float32) Args: id: The environment ID. This must be a valid ID from the registry. num_envs: Number of copies of the environment. asynchronous: If `True`, wraps the environments in an :class:`AsyncVectorEnv` (which uses `multiprocessing`_ to run the environments in parallel). If ``False``, wraps the environments in a :class:`SyncVectorEnv`. wrappers: If not ``None``, then apply the wrappers to each internal environment during creation. disable_env_checker: If to run the env checker for the first environment only. None will default to the environment spec `disable_env_checker` parameter (that is by default False), otherwise will run according to this argument (True = not run, False = run) **kwargs: Keywords arguments applied during `gym.make` Returns: The vectorized environment. """ def create_env(env_num: int): """Creates an environment that can enable or disable the environment checker.""" # If the env_num > 0 then disable the environment checker otherwise use the parameter _disable_env_checker = True if env_num > 0 else disable_env_checker def _make_env(): env = gym.envs.registration.make( id, disable_env_checker=_disable_env_checker, **kwargs, ) if wrappers is not None: if callable(wrappers): env = wrappers(env) elif isinstance(wrappers, Iterable) and all( [callable(w) for w in wrappers] ): for wrapper in wrappers: env = wrapper(env) else: raise NotImplementedError return env return _make_env env_fns = [ create_env(disable_env_checker or env_num > 0) for env_num in range(num_envs) ] return AsyncVectorEnv(env_fns) if asynchronous else SyncVectorEnv(env_fns) ================================================ FILE: gym/vector/async_vector_env.py ================================================ """An async vector environment.""" import multiprocessing as mp import sys import time from copy import deepcopy from enum import Enum from typing import List, Optional, Sequence, Tuple, Union import numpy as np import gym from gym import logger from gym.core import ObsType from gym.error import ( AlreadyPendingCallError, ClosedEnvironmentError, CustomSpaceError, NoAsyncCallError, ) from gym.vector.utils import ( CloudpickleWrapper, clear_mpi_env_vars, concatenate, create_empty_array, create_shared_memory, iterate, read_from_shared_memory, write_to_shared_memory, ) from gym.vector.vector_env import VectorEnv __all__ = ["AsyncVectorEnv"] class AsyncState(Enum): DEFAULT = "default" WAITING_RESET = "reset" WAITING_STEP = "step" WAITING_CALL = "call" class AsyncVectorEnv(VectorEnv): """Vectorized environment that runs multiple environments in parallel. It uses ``multiprocessing`` processes, and pipes for communication. Example:: >>> import gym >>> env = gym.vector.AsyncVectorEnv([ ... lambda: gym.make("Pendulum-v0", g=9.81), ... lambda: gym.make("Pendulum-v0", g=1.62) ... ]) >>> env.reset() array([[-0.8286432 , 0.5597771 , 0.90249056], [-0.85009176, 0.5266346 , 0.60007906]], dtype=float32) """ def __init__( self, env_fns: Sequence[callable], observation_space: Optional[gym.Space] = None, action_space: Optional[gym.Space] = None, shared_memory: bool = True, copy: bool = True, context: Optional[str] = None, daemon: bool = True, worker: Optional[callable] = None, ): """Vectorized environment that runs multiple environments in parallel. Args: env_fns: Functions that create the environments. observation_space: Observation space of a single environment. If ``None``, then the observation space of the first environment is taken. action_space: Action space of a single environment. If ``None``, then the action space of the first environment is taken. shared_memory: If ``True``, then the observations from the worker processes are communicated back through shared variables. This can improve the efficiency if the observations are large (e.g. images). copy: If ``True``, then the :meth:`~AsyncVectorEnv.reset` and :meth:`~AsyncVectorEnv.step` methods return a copy of the observations. context: Context for `multiprocessing`_. If ``None``, then the default context is used. daemon: If ``True``, then subprocesses have ``daemon`` flag turned on; that is, they will quit if the head process quits. However, ``daemon=True`` prevents subprocesses to spawn children, so for some environments you may want to have it set to ``False``. worker: If set, then use that worker in a subprocess instead of a default one. Can be useful to override some inner vector env logic, for instance, how resets on termination or truncation are handled. Warnings: worker is an advanced mode option. It provides a high degree of flexibility and a high chance to shoot yourself in the foot; thus, if you are writing your own worker, it is recommended to start from the code for ``_worker`` (or ``_worker_shared_memory``) method, and add changes. Raises: RuntimeError: If the observation space of some sub-environment does not match observation_space (or, by default, the observation space of the first sub-environment). ValueError: If observation_space is a custom space (i.e. not a default space in Gym, such as gym.spaces.Box, gym.spaces.Discrete, or gym.spaces.Dict) and shared_memory is True. """ ctx = mp.get_context(context) self.env_fns = env_fns self.shared_memory = shared_memory self.copy = copy dummy_env = env_fns[0]() self.metadata = dummy_env.metadata if (observation_space is None) or (action_space is None): observation_space = observation_space or dummy_env.observation_space action_space = action_space or dummy_env.action_space dummy_env.close() del dummy_env super().__init__( num_envs=len(env_fns), observation_space=observation_space, action_space=action_space, ) if self.shared_memory: try: _obs_buffer = create_shared_memory( self.single_observation_space, n=self.num_envs, ctx=ctx ) self.observations = read_from_shared_memory( self.single_observation_space, _obs_buffer, n=self.num_envs ) except CustomSpaceError: raise ValueError( "Using `shared_memory=True` in `AsyncVectorEnv` " "is incompatible with non-standard Gym observation spaces " "(i.e. custom spaces inheriting from `gym.Space`), and is " "only compatible with default Gym spaces (e.g. `Box`, " "`Tuple`, `Dict`) for batching. Set `shared_memory=False` " "if you use custom observation spaces." ) else: _obs_buffer = None self.observations = create_empty_array( self.single_observation_space, n=self.num_envs, fn=np.zeros ) self.parent_pipes, self.processes = [], [] self.error_queue = ctx.Queue() target = _worker_shared_memory if self.shared_memory else _worker target = worker or target with clear_mpi_env_vars(): for idx, env_fn in enumerate(self.env_fns): parent_pipe, child_pipe = ctx.Pipe() process = ctx.Process( target=target, name=f"Worker<{type(self).__name__}>-{idx}", args=( idx, CloudpickleWrapper(env_fn), child_pipe, parent_pipe, _obs_buffer, self.error_queue, ), ) self.parent_pipes.append(parent_pipe) self.processes.append(process) process.daemon = daemon process.start() child_pipe.close() self._state = AsyncState.DEFAULT self._check_spaces() def reset_async( self, seed: Optional[Union[int, List[int]]] = None, options: Optional[dict] = None, ): """Send calls to the :obj:`reset` methods of the sub-environments. To get the results of these calls, you may invoke :meth:`reset_wait`. Args: seed: List of seeds for each environment options: The reset option Raises: ClosedEnvironmentError: If the environment was closed (if :meth:`close` was previously called). AlreadyPendingCallError: If the environment is already waiting for a pending call to another method (e.g. :meth:`step_async`). This can be caused by two consecutive calls to :meth:`reset_async`, with no call to :meth:`reset_wait` in between. """ self._assert_is_running() if seed is None: seed = [None for _ in range(self.num_envs)] if isinstance(seed, int): seed = [seed + i for i in range(self.num_envs)] assert len(seed) == self.num_envs if self._state != AsyncState.DEFAULT: raise AlreadyPendingCallError( f"Calling `reset_async` while waiting for a pending call to `{self._state.value}` to complete", self._state.value, ) for pipe, single_seed in zip(self.parent_pipes, seed): single_kwargs = {} if single_seed is not None: single_kwargs["seed"] = single_seed if options is not None: single_kwargs["options"] = options pipe.send(("reset", single_kwargs)) self._state = AsyncState.WAITING_RESET def reset_wait( self, timeout: Optional[Union[int, float]] = None, seed: Optional[int] = None, options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, List[dict]]]: """Waits for the calls triggered by :meth:`reset_async` to finish and returns the results. Args: timeout: Number of seconds before the call to `reset_wait` times out. If `None`, the call to `reset_wait` never times out. seed: ignored options: ignored Returns: A tuple of batched observations and list of dictionaries Raises: ClosedEnvironmentError: If the environment was closed (if :meth:`close` was previously called). NoAsyncCallError: If :meth:`reset_wait` was called without any prior call to :meth:`reset_async`. TimeoutError: If :meth:`reset_wait` timed out. """ self._assert_is_running() if self._state != AsyncState.WAITING_RESET: raise NoAsyncCallError( "Calling `reset_wait` without any prior " "call to `reset_async`.", AsyncState.WAITING_RESET.value, ) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError( f"The call to `reset_wait` has timed out after {timeout} second(s)." ) results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT infos = {} results, info_data = zip(*results) for i, info in enumerate(info_data): infos = self._add_info(infos, info, i) if not self.shared_memory: self.observations = concatenate( self.single_observation_space, results, self.observations ) return (deepcopy(self.observations) if self.copy else self.observations), infos def step_async(self, actions: np.ndarray): """Send the calls to :obj:`step` to each sub-environment. Args: actions: Batch of actions. element of :attr:`~VectorEnv.action_space` Raises: ClosedEnvironmentError: If the environment was closed (if :meth:`close` was previously called). AlreadyPendingCallError: If the environment is already waiting for a pending call to another method (e.g. :meth:`reset_async`). This can be caused by two consecutive calls to :meth:`step_async`, with no call to :meth:`step_wait` in between. """ self._assert_is_running() if self._state != AsyncState.DEFAULT: raise AlreadyPendingCallError( f"Calling `step_async` while waiting for a pending call to `{self._state.value}` to complete.", self._state.value, ) actions = iterate(self.action_space, actions) for pipe, action in zip(self.parent_pipes, actions): pipe.send(("step", action)) self._state = AsyncState.WAITING_STEP def step_wait( self, timeout: Optional[Union[int, float]] = None ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, List[dict]]: """Wait for the calls to :obj:`step` in each sub-environment to finish. Args: timeout: Number of seconds before the call to :meth:`step_wait` times out. If ``None``, the call to :meth:`step_wait` never times out. Returns: The batched environment step information, (obs, reward, terminated, truncated, info) Raises: ClosedEnvironmentError: If the environment was closed (if :meth:`close` was previously called). NoAsyncCallError: If :meth:`step_wait` was called without any prior call to :meth:`step_async`. TimeoutError: If :meth:`step_wait` timed out. """ self._assert_is_running() if self._state != AsyncState.WAITING_STEP: raise NoAsyncCallError( "Calling `step_wait` without any prior call " "to `step_async`.", AsyncState.WAITING_STEP.value, ) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError( f"The call to `step_wait` has timed out after {timeout} second(s)." ) observations_list, rewards, terminateds, truncateds, infos = [], [], [], [], {} successes = [] for i, pipe in enumerate(self.parent_pipes): result, success = pipe.recv() obs, rew, terminated, truncated, info = result successes.append(success) observations_list.append(obs) rewards.append(rew) terminateds.append(terminated) truncateds.append(truncated) infos = self._add_info(infos, info, i) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT if not self.shared_memory: self.observations = concatenate( self.single_observation_space, observations_list, self.observations, ) return ( deepcopy(self.observations) if self.copy else self.observations, np.array(rewards), np.array(terminateds, dtype=np.bool_), np.array(truncateds, dtype=np.bool_), infos, ) def call_async(self, name: str, *args, **kwargs): """Calls the method with name asynchronously and apply args and kwargs to the method. Args: name: Name of the method or property to call. *args: Arguments to apply to the method call. **kwargs: Keyword arguments to apply to the method call. Raises: ClosedEnvironmentError: If the environment was closed (if :meth:`close` was previously called). AlreadyPendingCallError: Calling `call_async` while waiting for a pending call to complete """ self._assert_is_running() if self._state != AsyncState.DEFAULT: raise AlreadyPendingCallError( "Calling `call_async` while waiting " f"for a pending call to `{self._state.value}` to complete.", self._state.value, ) for pipe in self.parent_pipes: pipe.send(("_call", (name, args, kwargs))) self._state = AsyncState.WAITING_CALL def call_wait(self, timeout: Optional[Union[int, float]] = None) -> list: """Calls all parent pipes and waits for the results. Args: timeout: Number of seconds before the call to `step_wait` times out. If `None` (default), the call to `step_wait` never times out. Returns: List of the results of the individual calls to the method or property for each environment. Raises: NoAsyncCallError: Calling `call_wait` without any prior call to `call_async`. TimeoutError: The call to `call_wait` has timed out after timeout second(s). """ self._assert_is_running() if self._state != AsyncState.WAITING_CALL: raise NoAsyncCallError( "Calling `call_wait` without any prior call to `call_async`.", AsyncState.WAITING_CALL.value, ) if not self._poll(timeout): self._state = AsyncState.DEFAULT raise mp.TimeoutError( f"The call to `call_wait` has timed out after {timeout} second(s)." ) results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) self._state = AsyncState.DEFAULT return results def set_attr(self, name: str, values: Union[list, tuple, object]): """Sets an attribute of the sub-environments. Args: name: Name of the property to be set in each individual environment. values: Values of the property to be set to. If ``values`` is a list or tuple, then it corresponds to the values for each individual environment, otherwise a single value is set for all environments. Raises: ValueError: Values must be a list or tuple with length equal to the number of environments. AlreadyPendingCallError: Calling `set_attr` while waiting for a pending call to complete. """ self._assert_is_running() if not isinstance(values, (list, tuple)): values = [values for _ in range(self.num_envs)] if len(values) != self.num_envs: raise ValueError( "Values must be a list or tuple with length equal to the " f"number of environments. Got `{len(values)}` values for " f"{self.num_envs} environments." ) if self._state != AsyncState.DEFAULT: raise AlreadyPendingCallError( "Calling `set_attr` while waiting " f"for a pending call to `{self._state.value}` to complete.", self._state.value, ) for pipe, value in zip(self.parent_pipes, values): pipe.send(("_setattr", (name, value))) _, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) def close_extras( self, timeout: Optional[Union[int, float]] = None, terminate: bool = False ): """Close the environments & clean up the extra resources (processes and pipes). Args: timeout: Number of seconds before the call to :meth:`close` times out. If ``None``, the call to :meth:`close` never times out. If the call to :meth:`close` times out, then all processes are terminated. terminate: If ``True``, then the :meth:`close` operation is forced and all processes are terminated. Raises: TimeoutError: If :meth:`close` timed out. """ timeout = 0 if terminate else timeout try: if self._state != AsyncState.DEFAULT: logger.warn( f"Calling `close` while waiting for a pending call to `{self._state.value}` to complete." ) function = getattr(self, f"{self._state.value}_wait") function(timeout) except mp.TimeoutError: terminate = True if terminate: for process in self.processes: if process.is_alive(): process.terminate() else: for pipe in self.parent_pipes: if (pipe is not None) and (not pipe.closed): pipe.send(("close", None)) for pipe in self.parent_pipes: if (pipe is not None) and (not pipe.closed): pipe.recv() for pipe in self.parent_pipes: if pipe is not None: pipe.close() for process in self.processes: process.join() def _poll(self, timeout=None): self._assert_is_running() if timeout is None: return True end_time = time.perf_counter() + timeout delta = None for pipe in self.parent_pipes: delta = max(end_time - time.perf_counter(), 0) if pipe is None: return False if pipe.closed or (not pipe.poll(delta)): return False return True def _check_spaces(self): self._assert_is_running() spaces = (self.single_observation_space, self.single_action_space) for pipe in self.parent_pipes: pipe.send(("_check_spaces", spaces)) results, successes = zip(*[pipe.recv() for pipe in self.parent_pipes]) self._raise_if_errors(successes) same_observation_spaces, same_action_spaces = zip(*results) if not all(same_observation_spaces): raise RuntimeError( "Some environments have an observation space different from " f"`{self.single_observation_space}`. In order to batch observations, " "the observation spaces from all environments must be equal." ) if not all(same_action_spaces): raise RuntimeError( "Some environments have an action space different from " f"`{self.single_action_space}`. In order to batch actions, the " "action spaces from all environments must be equal." ) def _assert_is_running(self): if self.closed: raise ClosedEnvironmentError( f"Trying to operate on `{type(self).__name__}`, after a call to `close()`." ) def _raise_if_errors(self, successes): if all(successes): return num_errors = self.num_envs - sum(successes) assert num_errors > 0 for i in range(num_errors): index, exctype, value = self.error_queue.get() logger.error( f"Received the following error from Worker-{index}: {exctype.__name__}: {value}" ) logger.error(f"Shutting down Worker-{index}.") self.parent_pipes[index].close() self.parent_pipes[index] = None if i == num_errors - 1: logger.error("Raising the last exception back to the main process.") raise exctype(value) def __del__(self): """On deleting the object, checks that the vector environment is closed.""" if not getattr(self, "closed", True) and hasattr(self, "_state"): self.close(terminate=True) def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue): assert shared_memory is None env = env_fn() parent_pipe.close() try: while True: command, data = pipe.recv() if command == "reset": observation, info = env.reset(**data) pipe.send(((observation, info), True)) elif command == "step": ( observation, reward, terminated, truncated, info, ) = env.step(data) if terminated or truncated: old_observation, old_info = observation, info observation, info = env.reset() info["final_observation"] = old_observation info["final_info"] = old_info pipe.send(((observation, reward, terminated, truncated, info), True)) elif command == "seed": env.seed(data) pipe.send((None, True)) elif command == "close": pipe.send((None, True)) break elif command == "_call": name, args, kwargs = data if name in ["reset", "step", "seed", "close"]: raise ValueError( f"Trying to call function `{name}` with " f"`_call`. Use `{name}` directly instead." ) function = getattr(env, name) if callable(function): pipe.send((function(*args, **kwargs), True)) else: pipe.send((function, True)) elif command == "_setattr": name, value = data setattr(env, name, value) pipe.send((None, True)) elif command == "_check_spaces": pipe.send( ( (data[0] == env.observation_space, data[1] == env.action_space), True, ) ) else: raise RuntimeError( f"Received unknown command `{command}`. Must " "be one of {`reset`, `step`, `seed`, `close`, `_call`, " "`_setattr`, `_check_spaces`}." ) except (KeyboardInterrupt, Exception): error_queue.put((index,) + sys.exc_info()[:2]) pipe.send((None, False)) finally: env.close() def _worker_shared_memory(index, env_fn, pipe, parent_pipe, shared_memory, error_queue): assert shared_memory is not None env = env_fn() observation_space = env.observation_space parent_pipe.close() try: while True: command, data = pipe.recv() if command == "reset": observation, info = env.reset(**data) write_to_shared_memory( observation_space, index, observation, shared_memory ) pipe.send(((None, info), True)) elif command == "step": ( observation, reward, terminated, truncated, info, ) = env.step(data) if terminated or truncated: old_observation, old_info = observation, info observation, info = env.reset() info["final_observation"] = old_observation info["final_info"] = old_info write_to_shared_memory( observation_space, index, observation, shared_memory ) pipe.send(((None, reward, terminated, truncated, info), True)) elif command == "seed": env.seed(data) pipe.send((None, True)) elif command == "close": pipe.send((None, True)) break elif command == "_call": name, args, kwargs = data if name in ["reset", "step", "seed", "close"]: raise ValueError( f"Trying to call function `{name}` with " f"`_call`. Use `{name}` directly instead." ) function = getattr(env, name) if callable(function): pipe.send((function(*args, **kwargs), True)) else: pipe.send((function, True)) elif command == "_setattr": name, value = data setattr(env, name, value) pipe.send((None, True)) elif command == "_check_spaces": pipe.send( ((data[0] == observation_space, data[1] == env.action_space), True) ) else: raise RuntimeError( f"Received unknown command `{command}`. Must " "be one of {`reset`, `step`, `seed`, `close`, `_call`, " "`_setattr`, `_check_spaces`}." ) except (KeyboardInterrupt, Exception): error_queue.put((index,) + sys.exc_info()[:2]) pipe.send((None, False)) finally: env.close() ================================================ FILE: gym/vector/sync_vector_env.py ================================================ """A synchronous vector environment.""" from copy import deepcopy from typing import Any, Callable, Iterator, List, Optional, Sequence, Union import numpy as np from gym import Env from gym.spaces import Space from gym.vector.utils import concatenate, create_empty_array, iterate from gym.vector.vector_env import VectorEnv __all__ = ["SyncVectorEnv"] class SyncVectorEnv(VectorEnv): """Vectorized environment that serially runs multiple environments. Example:: >>> import gym >>> env = gym.vector.SyncVectorEnv([ ... lambda: gym.make("Pendulum-v0", g=9.81), ... lambda: gym.make("Pendulum-v0", g=1.62) ... ]) >>> env.reset() array([[-0.8286432 , 0.5597771 , 0.90249056], [-0.85009176, 0.5266346 , 0.60007906]], dtype=float32) """ def __init__( self, env_fns: Iterator[Callable[[], Env]], observation_space: Space = None, action_space: Space = None, copy: bool = True, ): """Vectorized environment that serially runs multiple environments. Args: env_fns: iterable of callable functions that create the environments. observation_space: Observation space of a single environment. If ``None``, then the observation space of the first environment is taken. action_space: Action space of a single environment. If ``None``, then the action space of the first environment is taken. copy: If ``True``, then the :meth:`reset` and :meth:`step` methods return a copy of the observations. Raises: RuntimeError: If the observation space of some sub-environment does not match observation_space (or, by default, the observation space of the first sub-environment). """ self.env_fns = env_fns self.envs = [env_fn() for env_fn in env_fns] self.copy = copy self.metadata = self.envs[0].metadata if (observation_space is None) or (action_space is None): observation_space = observation_space or self.envs[0].observation_space action_space = action_space or self.envs[0].action_space super().__init__( num_envs=len(self.envs), observation_space=observation_space, action_space=action_space, ) self._check_spaces() self.observations = create_empty_array( self.single_observation_space, n=self.num_envs, fn=np.zeros ) self._rewards = np.zeros((self.num_envs,), dtype=np.float64) self._terminateds = np.zeros((self.num_envs,), dtype=np.bool_) self._truncateds = np.zeros((self.num_envs,), dtype=np.bool_) self._actions = None def seed(self, seed: Optional[Union[int, Sequence[int]]] = None): """Sets the seed in all sub-environments. Args: seed: The seed """ super().seed(seed=seed) if seed is None: seed = [None for _ in range(self.num_envs)] if isinstance(seed, int): seed = [seed + i for i in range(self.num_envs)] assert len(seed) == self.num_envs for env, single_seed in zip(self.envs, seed): env.seed(single_seed) def reset_wait( self, seed: Optional[Union[int, List[int]]] = None, options: Optional[dict] = None, ): """Waits for the calls triggered by :meth:`reset_async` to finish and returns the results. Args: seed: The reset environment seed options: Option information for the environment reset Returns: The reset observation of the environment and reset information """ if seed is None: seed = [None for _ in range(self.num_envs)] if isinstance(seed, int): seed = [seed + i for i in range(self.num_envs)] assert len(seed) == self.num_envs self._terminateds[:] = False self._truncateds[:] = False observations = [] infos = {} for i, (env, single_seed) in enumerate(zip(self.envs, seed)): kwargs = {} if single_seed is not None: kwargs["seed"] = single_seed if options is not None: kwargs["options"] = options observation, info = env.reset(**kwargs) observations.append(observation) infos = self._add_info(infos, info, i) self.observations = concatenate( self.single_observation_space, observations, self.observations ) return (deepcopy(self.observations) if self.copy else self.observations), infos def step_async(self, actions): """Sets :attr:`_actions` for use by the :meth:`step_wait` by converting the ``actions`` to an iterable version.""" self._actions = iterate(self.action_space, actions) def step_wait(self): """Steps through each of the environments returning the batched results. Returns: The batched environment step results """ observations, infos = [], {} for i, (env, action) in enumerate(zip(self.envs, self._actions)): ( observation, self._rewards[i], self._terminateds[i], self._truncateds[i], info, ) = env.step(action) if self._terminateds[i] or self._truncateds[i]: old_observation, old_info = observation, info observation, info = env.reset() info["final_observation"] = old_observation info["final_info"] = old_info observations.append(observation) infos = self._add_info(infos, info, i) self.observations = concatenate( self.single_observation_space, observations, self.observations ) return ( deepcopy(self.observations) if self.copy else self.observations, np.copy(self._rewards), np.copy(self._terminateds), np.copy(self._truncateds), infos, ) def call(self, name, *args, **kwargs) -> tuple: """Calls the method with name and applies args and kwargs. Args: name: The method name *args: The method args **kwargs: The method kwargs Returns: Tuple of results """ results = [] for env in self.envs: function = getattr(env, name) if callable(function): results.append(function(*args, **kwargs)) else: results.append(function) return tuple(results) def set_attr(self, name: str, values: Union[list, tuple, Any]): """Sets an attribute of the sub-environments. Args: name: The property name to change values: Values of the property to be set to. If ``values`` is a list or tuple, then it corresponds to the values for each individual environment, otherwise, a single value is set for all environments. Raises: ValueError: Values must be a list or tuple with length equal to the number of environments. """ if not isinstance(values, (list, tuple)): values = [values for _ in range(self.num_envs)] if len(values) != self.num_envs: raise ValueError( "Values must be a list or tuple with length equal to the " f"number of environments. Got `{len(values)}` values for " f"{self.num_envs} environments." ) for env, value in zip(self.envs, values): setattr(env, name, value) def close_extras(self, **kwargs): """Close the environments.""" [env.close() for env in self.envs] def _check_spaces(self) -> bool: for env in self.envs: if not (env.observation_space == self.single_observation_space): raise RuntimeError( "Some environments have an observation space different from " f"`{self.single_observation_space}`. In order to batch observations, " "the observation spaces from all environments must be equal." ) if not (env.action_space == self.single_action_space): raise RuntimeError( "Some environments have an action space different from " f"`{self.single_action_space}`. In order to batch actions, the " "action spaces from all environments must be equal." ) return True ================================================ FILE: gym/vector/utils/__init__.py ================================================ """Module for gym vector utils.""" from gym.vector.utils.misc import CloudpickleWrapper, clear_mpi_env_vars from gym.vector.utils.numpy_utils import concatenate, create_empty_array from gym.vector.utils.shared_memory import ( create_shared_memory, read_from_shared_memory, write_to_shared_memory, ) from gym.vector.utils.spaces import _BaseGymSpaces # pyright: reportPrivateUsage=false from gym.vector.utils.spaces import BaseGymSpaces, batch_space, iterate __all__ = [ "CloudpickleWrapper", "clear_mpi_env_vars", "concatenate", "create_empty_array", "create_shared_memory", "read_from_shared_memory", "write_to_shared_memory", "BaseGymSpaces", "batch_space", "iterate", ] ================================================ FILE: gym/vector/utils/misc.py ================================================ """Miscellaneous utilities.""" import contextlib import os __all__ = ["CloudpickleWrapper", "clear_mpi_env_vars"] class CloudpickleWrapper: """Wrapper that uses cloudpickle to pickle and unpickle the result.""" def __init__(self, fn: callable): """Cloudpickle wrapper for a function.""" self.fn = fn def __getstate__(self): """Get the state using `cloudpickle.dumps(self.fn)`.""" import cloudpickle return cloudpickle.dumps(self.fn) def __setstate__(self, ob): """Sets the state with obs.""" import pickle self.fn = pickle.loads(ob) def __call__(self): """Calls the function `self.fn` with no arguments.""" return self.fn() @contextlib.contextmanager def clear_mpi_env_vars(): """Clears the MPI of environment variables. `from mpi4py import MPI` will call `MPI_Init` by default. If the child process has MPI environment variables, MPI will think that the child process is an MPI process just like the parent and do bad things such as hang. This context manager is a hacky way to clear those environment variables temporarily such as when we are starting multiprocessing Processes. Yields: Yields for the context manager """ removed_environment = {} for k, v in list(os.environ.items()): for prefix in ["OMPI_", "PMI_"]: if k.startswith(prefix): removed_environment[k] = v del os.environ[k] try: yield finally: os.environ.update(removed_environment) ================================================ FILE: gym/vector/utils/numpy_utils.py ================================================ """Numpy utility functions: concatenate space samples and create empty array.""" from collections import OrderedDict from functools import singledispatch from typing import Iterable, Union import numpy as np from gym.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Space, Tuple __all__ = ["concatenate", "create_empty_array"] @singledispatch def concatenate( space: Space, items: Iterable, out: Union[tuple, dict, np.ndarray] ) -> Union[tuple, dict, np.ndarray]: """Concatenate multiple samples from space into a single object. Example:: >>> from gym.spaces import Box >>> space = Box(low=0, high=1, shape=(3,), dtype=np.float32) >>> out = np.zeros((2, 3), dtype=np.float32) >>> items = [space.sample() for _ in range(2)] >>> concatenate(space, items, out) array([[0.6348213 , 0.28607962, 0.60760117], [0.87383074, 0.192658 , 0.2148103 ]], dtype=float32) Args: space: Observation space of a single environment in the vectorized environment. items: Samples to be concatenated. out: The output object. This object is a (possibly nested) numpy array. Returns: The output object. This object is a (possibly nested) numpy array. Raises: ValueError: Space is not a valid :class:`gym.Space` instance """ raise ValueError( f"Space of type `{type(space)}` is not a valid `gym.Space` instance." ) @concatenate.register(Box) @concatenate.register(Discrete) @concatenate.register(MultiDiscrete) @concatenate.register(MultiBinary) def _concatenate_base(space, items, out): return np.stack(items, axis=0, out=out) @concatenate.register(Tuple) def _concatenate_tuple(space, items, out): return tuple( concatenate(subspace, [item[i] for item in items], out[i]) for (i, subspace) in enumerate(space.spaces) ) @concatenate.register(Dict) def _concatenate_dict(space, items, out): return OrderedDict( [ (key, concatenate(subspace, [item[key] for item in items], out[key])) for (key, subspace) in space.spaces.items() ] ) @concatenate.register(Space) def _concatenate_custom(space, items, out): return tuple(items) @singledispatch def create_empty_array( space: Space, n: int = 1, fn: callable = np.zeros ) -> Union[tuple, dict, np.ndarray]: """Create an empty (possibly nested) numpy array. Example:: >>> from gym.spaces import Box, Dict >>> space = Dict({ ... 'position': Box(low=0, high=1, shape=(3,), dtype=np.float32), ... 'velocity': Box(low=0, high=1, shape=(2,), dtype=np.float32)}) >>> create_empty_array(space, n=2, fn=np.zeros) OrderedDict([('position', array([[0., 0., 0.], [0., 0., 0.]], dtype=float32)), ('velocity', array([[0., 0.], [0., 0.]], dtype=float32))]) Args: space: Observation space of a single environment in the vectorized environment. n: Number of environments in the vectorized environment. If `None`, creates an empty sample from `space`. fn: Function to apply when creating the empty numpy array. Examples of such functions are `np.empty` or `np.zeros`. Returns: The output object. This object is a (possibly nested) numpy array. Raises: ValueError: Space is not a valid :class:`gym.Space` instance """ raise ValueError( f"Space of type `{type(space)}` is not a valid `gym.Space` instance." ) @create_empty_array.register(Box) @create_empty_array.register(Discrete) @create_empty_array.register(MultiDiscrete) @create_empty_array.register(MultiBinary) def _create_empty_array_base(space, n=1, fn=np.zeros): shape = space.shape if (n is None) else (n,) + space.shape return fn(shape, dtype=space.dtype) @create_empty_array.register(Tuple) def _create_empty_array_tuple(space, n=1, fn=np.zeros): return tuple(create_empty_array(subspace, n=n, fn=fn) for subspace in space.spaces) @create_empty_array.register(Dict) def _create_empty_array_dict(space, n=1, fn=np.zeros): return OrderedDict( [ (key, create_empty_array(subspace, n=n, fn=fn)) for (key, subspace) in space.spaces.items() ] ) @create_empty_array.register(Space) def _create_empty_array_custom(space, n=1, fn=np.zeros): return None ================================================ FILE: gym/vector/utils/shared_memory.py ================================================ """Utility functions for vector environments to share memory between processes.""" import multiprocessing as mp from collections import OrderedDict from ctypes import c_bool from functools import singledispatch from typing import Union import numpy as np from gym.error import CustomSpaceError from gym.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Space, Tuple __all__ = ["create_shared_memory", "read_from_shared_memory", "write_to_shared_memory"] @singledispatch def create_shared_memory( space: Space, n: int = 1, ctx=mp ) -> Union[dict, tuple, mp.Array]: """Create a shared memory object, to be shared across processes. This eventually contains the observations from the vectorized environment. Args: space: Observation space of a single environment in the vectorized environment. n: Number of environments in the vectorized environment (i.e. the number of processes). ctx: The multiprocess module Returns: shared_memory for the shared object across processes. Raises: CustomSpaceError: Space is not a valid :class:`gym.Space` instance """ raise CustomSpaceError( "Cannot create a shared memory for space with " f"type `{type(space)}`. Shared memory only supports " "default Gym spaces (e.g. `Box`, `Tuple`, " "`Dict`, etc...), and does not support custom " "Gym spaces." ) @create_shared_memory.register(Box) @create_shared_memory.register(Discrete) @create_shared_memory.register(MultiDiscrete) @create_shared_memory.register(MultiBinary) def _create_base_shared_memory(space, n: int = 1, ctx=mp): dtype = space.dtype.char if dtype in "?": dtype = c_bool return ctx.Array(dtype, n * int(np.prod(space.shape))) @create_shared_memory.register(Tuple) def _create_tuple_shared_memory(space, n: int = 1, ctx=mp): return tuple( create_shared_memory(subspace, n=n, ctx=ctx) for subspace in space.spaces ) @create_shared_memory.register(Dict) def _create_dict_shared_memory(space, n=1, ctx=mp): return OrderedDict( [ (key, create_shared_memory(subspace, n=n, ctx=ctx)) for (key, subspace) in space.spaces.items() ] ) @singledispatch def read_from_shared_memory( space: Space, shared_memory: Union[dict, tuple, mp.Array], n: int = 1 ) -> Union[dict, tuple, np.ndarray]: """Read the batch of observations from shared memory as a numpy array. ..notes:: The numpy array objects returned by `read_from_shared_memory` shares the memory of `shared_memory`. Any changes to `shared_memory` are forwarded to `observations`, and vice-versa. To avoid any side-effect, use `np.copy`. Args: space: Observation space of a single environment in the vectorized environment. shared_memory: Shared object across processes. This contains the observations from the vectorized environment. This object is created with `create_shared_memory`. n: Number of environments in the vectorized environment (i.e. the number of processes). Returns: Batch of observations as a (possibly nested) numpy array. Raises: CustomSpaceError: Space is not a valid :class:`gym.Space` instance """ raise CustomSpaceError( "Cannot read from a shared memory for space with " f"type `{type(space)}`. Shared memory only supports " "default Gym spaces (e.g. `Box`, `Tuple`, " "`Dict`, etc...), and does not support custom " "Gym spaces." ) @read_from_shared_memory.register(Box) @read_from_shared_memory.register(Discrete) @read_from_shared_memory.register(MultiDiscrete) @read_from_shared_memory.register(MultiBinary) def _read_base_from_shared_memory(space, shared_memory, n: int = 1): return np.frombuffer(shared_memory.get_obj(), dtype=space.dtype).reshape( (n,) + space.shape ) @read_from_shared_memory.register(Tuple) def _read_tuple_from_shared_memory(space, shared_memory, n: int = 1): return tuple( read_from_shared_memory(subspace, memory, n=n) for (memory, subspace) in zip(shared_memory, space.spaces) ) @read_from_shared_memory.register(Dict) def _read_dict_from_shared_memory(space, shared_memory, n: int = 1): return OrderedDict( [ (key, read_from_shared_memory(subspace, shared_memory[key], n=n)) for (key, subspace) in space.spaces.items() ] ) @singledispatch def write_to_shared_memory( space: Space, index: int, value: np.ndarray, shared_memory: Union[dict, tuple, mp.Array], ): """Write the observation of a single environment into shared memory. Args: space: Observation space of a single environment in the vectorized environment. index: Index of the environment (must be in `[0, num_envs)`). value: Observation of the single environment to write to shared memory. shared_memory: Shared object across processes. This contains the observations from the vectorized environment. This object is created with `create_shared_memory`. Raises: CustomSpaceError: Space is not a valid :class:`gym.Space` instance """ raise CustomSpaceError( "Cannot write to a shared memory for space with " f"type `{type(space)}`. Shared memory only supports " "default Gym spaces (e.g. `Box`, `Tuple`, " "`Dict`, etc...), and does not support custom " "Gym spaces." ) @write_to_shared_memory.register(Box) @write_to_shared_memory.register(Discrete) @write_to_shared_memory.register(MultiDiscrete) @write_to_shared_memory.register(MultiBinary) def _write_base_to_shared_memory(space, index, value, shared_memory): size = int(np.prod(space.shape)) destination = np.frombuffer(shared_memory.get_obj(), dtype=space.dtype) np.copyto( destination[index * size : (index + 1) * size], np.asarray(value, dtype=space.dtype).flatten(), ) @write_to_shared_memory.register(Tuple) def _write_tuple_to_shared_memory(space, index, values, shared_memory): for value, memory, subspace in zip(values, shared_memory, space.spaces): write_to_shared_memory(subspace, index, value, memory) @write_to_shared_memory.register(Dict) def _write_dict_to_shared_memory(space, index, values, shared_memory): for key, subspace in space.spaces.items(): write_to_shared_memory(subspace, index, values[key], shared_memory[key]) ================================================ FILE: gym/vector/utils/spaces.py ================================================ """Utility functions for gym spaces: batch space and iterator.""" from collections import OrderedDict from copy import deepcopy from functools import singledispatch from typing import Iterator import numpy as np from gym.error import CustomSpaceError from gym.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Space, Tuple BaseGymSpaces = (Box, Discrete, MultiDiscrete, MultiBinary) _BaseGymSpaces = BaseGymSpaces __all__ = ["BaseGymSpaces", "_BaseGymSpaces", "batch_space", "iterate"] @singledispatch def batch_space(space: Space, n: int = 1) -> Space: """Create a (batched) space, containing multiple copies of a single space. Example:: >>> from gym.spaces import Box, Dict >>> space = Dict({ ... 'position': Box(low=0, high=1, shape=(3,), dtype=np.float32), ... 'velocity': Box(low=0, high=1, shape=(2,), dtype=np.float32) ... }) >>> batch_space(space, n=5) Dict(position:Box(5, 3), velocity:Box(5, 2)) Args: space: Space (e.g. the observation space) for a single environment in the vectorized environment. n: Number of environments in the vectorized environment. Returns: Space (e.g. the observation space) for a batch of environments in the vectorized environment. Raises: ValueError: Cannot batch space that is not a valid :class:`gym.Space` instance """ raise ValueError( f"Cannot batch space with type `{type(space)}`. The space must be a valid `gym.Space` instance." ) @batch_space.register(Box) def _batch_space_box(space, n=1): repeats = tuple([n] + [1] * space.low.ndim) low, high = np.tile(space.low, repeats), np.tile(space.high, repeats) return Box(low=low, high=high, dtype=space.dtype, seed=deepcopy(space.np_random)) @batch_space.register(Discrete) def _batch_space_discrete(space, n=1): if space.start == 0: return MultiDiscrete( np.full((n,), space.n, dtype=space.dtype), dtype=space.dtype, seed=deepcopy(space.np_random), ) else: return Box( low=space.start, high=space.start + space.n - 1, shape=(n,), dtype=space.dtype, seed=deepcopy(space.np_random), ) @batch_space.register(MultiDiscrete) def _batch_space_multidiscrete(space, n=1): repeats = tuple([n] + [1] * space.nvec.ndim) high = np.tile(space.nvec, repeats) - 1 return Box( low=np.zeros_like(high), high=high, dtype=space.dtype, seed=deepcopy(space.np_random), ) @batch_space.register(MultiBinary) def _batch_space_multibinary(space, n=1): return Box( low=0, high=1, shape=(n,) + space.shape, dtype=space.dtype, seed=deepcopy(space.np_random), ) @batch_space.register(Tuple) def _batch_space_tuple(space, n=1): return Tuple( tuple(batch_space(subspace, n=n) for subspace in space.spaces), seed=deepcopy(space.np_random), ) @batch_space.register(Dict) def _batch_space_dict(space, n=1): return Dict( OrderedDict( [ (key, batch_space(subspace, n=n)) for (key, subspace) in space.spaces.items() ] ), seed=deepcopy(space.np_random), ) @batch_space.register(Space) def _batch_space_custom(space, n=1): # Without deepcopy, then the space.np_random is batched_space.spaces[0].np_random # Which is an issue if you are sampling actions of both the original space and the batched space batched_space = Tuple( tuple(deepcopy(space) for _ in range(n)), seed=deepcopy(space.np_random) ) new_seeds = list(map(int, batched_space.np_random.integers(0, 1e8, n))) batched_space.seed(new_seeds) return batched_space @singledispatch def iterate(space: Space, items) -> Iterator: """Iterate over the elements of a (batched) space. Example:: >>> from gym.spaces import Box, Dict >>> space = Dict({ ... 'position': Box(low=0, high=1, shape=(2, 3), dtype=np.float32), ... 'velocity': Box(low=0, high=1, shape=(2, 2), dtype=np.float32)}) >>> items = space.sample() >>> it = iterate(space, items) >>> next(it) {'position': array([-0.99644893, -0.08304597, -0.7238421 ], dtype=float32), 'velocity': array([0.35848552, 0.1533453 ], dtype=float32)} >>> next(it) {'position': array([-0.67958736, -0.49076623, 0.38661423], dtype=float32), 'velocity': array([0.7975036 , 0.93317133], dtype=float32)} >>> next(it) StopIteration Args: space: Space to which `items` belong to. items: Items to be iterated over. Returns: Iterator over the elements in `items`. Raises: ValueError: Space is not an instance of :class:`gym.Space` """ raise ValueError( f"Space of type `{type(space)}` is not a valid `gym.Space` instance." ) @iterate.register(Discrete) def _iterate_discrete(space, items): raise TypeError("Unable to iterate over a space of type `Discrete`.") @iterate.register(Box) @iterate.register(MultiDiscrete) @iterate.register(MultiBinary) def _iterate_base(space, items): try: return iter(items) except TypeError: raise TypeError(f"Unable to iterate over the following elements: {items}") @iterate.register(Tuple) def _iterate_tuple(space, items): # If this is a tuple of custom subspaces only, then simply iterate over items if all( isinstance(subspace, Space) and (not isinstance(subspace, BaseGymSpaces + (Tuple, Dict))) for subspace in space.spaces ): return iter(items) return zip( *[iterate(subspace, items[i]) for i, subspace in enumerate(space.spaces)] ) @iterate.register(Dict) def _iterate_dict(space, items): keys, values = zip( *[ (key, iterate(subspace, items[key])) for key, subspace in space.spaces.items() ] ) for item in zip(*values): yield OrderedDict([(key, value) for (key, value) in zip(keys, item)]) @iterate.register(Space) def _iterate_custom(space, items): raise CustomSpaceError( f"Unable to iterate over {items}, since {space} " "is a custom `gym.Space` instance (i.e. not one of " "`Box`, `Dict`, etc...)." ) ================================================ FILE: gym/vector/vector_env.py ================================================ """Base class for vectorized environments.""" from typing import Any, List, Optional, Tuple, Union import numpy as np import gym from gym.vector.utils.spaces import batch_space __all__ = ["VectorEnv"] class VectorEnv(gym.Env): """Base class for vectorized environments. Runs multiple independent copies of the same environment in parallel. This is not the same as 1 environment that has multiple subcomponents, but it is many copies of the same base env. Each observation returned from vectorized environment is a batch of observations for each parallel environment. And :meth:`step` is also expected to receive a batch of actions for each parallel environment. Notes: All parallel environments should share the identical observation and action spaces. In other words, a vector of multiple different environments is not supported. """ def __init__( self, num_envs: int, observation_space: gym.Space, action_space: gym.Space, ): """Base class for vectorized environments. Args: num_envs: Number of environments in the vectorized environment. observation_space: Observation space of a single environment. action_space: Action space of a single environment. """ self.num_envs = num_envs self.is_vector_env = True self.observation_space = batch_space(observation_space, n=num_envs) self.action_space = batch_space(action_space, n=num_envs) self.closed = False self.viewer = None # The observation and action spaces of a single environment are # kept in separate properties self.single_observation_space = observation_space self.single_action_space = action_space def reset_async( self, seed: Optional[Union[int, List[int]]] = None, options: Optional[dict] = None, ): """Reset the sub-environments asynchronously. This method will return ``None``. A call to :meth:`reset_async` should be followed by a call to :meth:`reset_wait` to retrieve the results. Args: seed: The reset seed options: Reset options """ pass def reset_wait( self, seed: Optional[Union[int, List[int]]] = None, options: Optional[dict] = None, ): """Retrieves the results of a :meth:`reset_async` call. A call to this method must always be preceded by a call to :meth:`reset_async`. Args: seed: The reset seed options: Reset options Returns: The results from :meth:`reset_async` Raises: NotImplementedError: VectorEnv does not implement function """ raise NotImplementedError("VectorEnv does not implement function") def reset( self, *, seed: Optional[Union[int, List[int]]] = None, options: Optional[dict] = None, ): """Reset all parallel environments and return a batch of initial observations. Args: seed: The environment reset seeds options: If to return the options Returns: A batch of observations from the vectorized environment. """ self.reset_async(seed=seed, options=options) return self.reset_wait(seed=seed, options=options) def step_async(self, actions): """Asynchronously performs steps in the sub-environments. The results can be retrieved via a call to :meth:`step_wait`. Args: actions: The actions to take asynchronously """ def step_wait(self, **kwargs): """Retrieves the results of a :meth:`step_async` call. A call to this method must always be preceded by a call to :meth:`step_async`. Args: **kwargs: Additional keywords for vector implementation Returns: The results from the :meth:`step_async` call """ def step(self, actions): """Take an action for each parallel environment. Args: actions: element of :attr:`action_space` Batch of actions. Returns: Batch of (observations, rewards, terminated, truncated, infos) or (observations, rewards, dones, infos) """ self.step_async(actions) return self.step_wait() def call_async(self, name, *args, **kwargs): """Calls a method name for each parallel environment asynchronously.""" def call_wait(self, **kwargs) -> List[Any]: # type: ignore """After calling a method in :meth:`call_async`, this function collects the results.""" def call(self, name: str, *args, **kwargs) -> List[Any]: """Call a method, or get a property, from each parallel environment. Args: name (str): Name of the method or property to call. *args: Arguments to apply to the method call. **kwargs: Keyword arguments to apply to the method call. Returns: List of the results of the individual calls to the method or property for each environment. """ self.call_async(name, *args, **kwargs) return self.call_wait() def get_attr(self, name: str): """Get a property from each parallel environment. Args: name (str): Name of the property to be get from each individual environment. Returns: The property with name """ return self.call(name) def set_attr(self, name: str, values: Union[list, tuple, object]): """Set a property in each sub-environment. Args: name (str): Name of the property to be set in each individual environment. values (list, tuple, or object): Values of the property to be set to. If `values` is a list or tuple, then it corresponds to the values for each individual environment, otherwise a single value is set for all environments. """ def close_extras(self, **kwargs): """Clean up the extra resources e.g. beyond what's in this base class.""" pass def close(self, **kwargs): """Close all parallel environments and release resources. It also closes all the existing image viewers, then calls :meth:`close_extras` and set :attr:`closed` as ``True``. Warnings: This function itself does not close the environments, it should be handled in :meth:`close_extras`. This is generic for both synchronous and asynchronous vectorized environments. Notes: This will be automatically called when garbage collected or program exited. Args: **kwargs: Keyword arguments passed to :meth:`close_extras` """ if self.closed: return if self.viewer is not None: self.viewer.close() self.close_extras(**kwargs) self.closed = True def _add_info(self, infos: dict, info: dict, env_num: int) -> dict: """Add env info to the info dictionary of the vectorized environment. Given the `info` of a single environment add it to the `infos` dictionary which represents all the infos of the vectorized environment. Every `key` of `info` is paired with a boolean mask `_key` representing whether or not the i-indexed environment has this `info`. Args: infos (dict): the infos of the vectorized environment info (dict): the info coming from the single environment env_num (int): the index of the single environment Returns: infos (dict): the (updated) infos of the vectorized environment """ for k in info.keys(): if k not in infos: info_array, array_mask = self._init_info_arrays(type(info[k])) else: info_array, array_mask = infos[k], infos[f"_{k}"] info_array[env_num], array_mask[env_num] = info[k], True infos[k], infos[f"_{k}"] = info_array, array_mask return infos def _init_info_arrays(self, dtype: type) -> Tuple[np.ndarray, np.ndarray]: """Initialize the info array. Initialize the info array. If the dtype is numeric the info array will have the same dtype, otherwise will be an array of `None`. Also, a boolean array of the same length is returned. It will be used for assessing which environment has info data. Args: dtype (type): data type of the info coming from the env. Returns: array (np.ndarray): the initialized info array. array_mask (np.ndarray): the initialized boolean array. """ if dtype in [int, float, bool] or issubclass(dtype, np.number): array = np.zeros(self.num_envs, dtype=dtype) else: array = np.zeros(self.num_envs, dtype=object) array[:] = None array_mask = np.zeros(self.num_envs, dtype=bool) return array, array_mask def __del__(self): """Closes the vector environment.""" if not getattr(self, "closed", True): self.close() def __repr__(self) -> str: """Returns a string representation of the vector environment. Returns: A string containing the class name, number of environments and environment spec id """ if self.spec is None: return f"{self.__class__.__name__}({self.num_envs})" else: return f"{self.__class__.__name__}({self.spec.id}, {self.num_envs})" class VectorEnvWrapper(VectorEnv): """Wraps the vectorized environment to allow a modular transformation. This class is the base class for all wrappers for vectorized environments. The subclass could override some methods to change the behavior of the original vectorized environment without touching the original code. Notes: Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`. """ def __init__(self, env: VectorEnv): assert isinstance(env, VectorEnv) self.env = env # explicitly forward the methods defined in VectorEnv # to self.env (instead of the base class) def reset_async(self, **kwargs): return self.env.reset_async(**kwargs) def reset_wait(self, **kwargs): return self.env.reset_wait(**kwargs) def step_async(self, actions): return self.env.step_async(actions) def step_wait(self): return self.env.step_wait() def close(self, **kwargs): return self.env.close(**kwargs) def close_extras(self, **kwargs): return self.env.close_extras(**kwargs) def call(self, name, *args, **kwargs): return self.env.call(name, *args, **kwargs) def set_attr(self, name, values): return self.env.set_attr(name, values) # implicitly forward all other methods and attributes to self.env def __getattr__(self, name): if name.startswith("_"): raise AttributeError(f"attempted to get missing private attribute '{name}'") return getattr(self.env, name) @property def unwrapped(self): return self.env.unwrapped def __repr__(self): return f"<{self.__class__.__name__}, {self.env}>" def __del__(self): self.env.__del__() ================================================ FILE: gym/version.py ================================================ VERSION = "0.26.2" ================================================ FILE: gym/wrappers/README.md ================================================ # Wrappers Wrappers are used to transform an environment in a modular way: ```python env = gym.make('Pong-v0') env = MyWrapper(env) ``` Note that we may later restructure any of the files in this directory, but will keep the wrappers available at the wrappers' top-level folder. So for example, you should access `MyWrapper` as follows: ```python from gym.wrappers import MyWrapper ``` ## Quick tips for writing your own wrapper - Don't forget to call `super(class_name, self).__init__(env)` if you override the wrapper's `__init__` function - You can access the inner environment with `self.unwrapped` - You can access the previous layer using `self.env` - The variables `metadata`, `action_space`, `observation_space`, `reward_range`, and `spec` are copied to `self` from the previous layer - Create a wrapped function for at least one of the following: `__init__(self, env)`, `step`, `reset`, `render`, `close`, or `seed` - Your layered function should take its input from the previous layer (`self.env`) and/or the inner layer (`self.unwrapped`) ================================================ FILE: gym/wrappers/__init__.py ================================================ """Module of wrapper classes.""" from gym import error from gym.wrappers.atari_preprocessing import AtariPreprocessing from gym.wrappers.autoreset import AutoResetWrapper from gym.wrappers.clip_action import ClipAction from gym.wrappers.filter_observation import FilterObservation from gym.wrappers.flatten_observation import FlattenObservation from gym.wrappers.frame_stack import FrameStack, LazyFrames from gym.wrappers.gray_scale_observation import GrayScaleObservation from gym.wrappers.human_rendering import HumanRendering from gym.wrappers.normalize import NormalizeObservation, NormalizeReward from gym.wrappers.order_enforcing import OrderEnforcing from gym.wrappers.record_episode_statistics import RecordEpisodeStatistics from gym.wrappers.record_video import RecordVideo, capped_cubic_video_schedule from gym.wrappers.render_collection import RenderCollection from gym.wrappers.rescale_action import RescaleAction from gym.wrappers.resize_observation import ResizeObservation from gym.wrappers.step_api_compatibility import StepAPICompatibility from gym.wrappers.time_aware_observation import TimeAwareObservation from gym.wrappers.time_limit import TimeLimit from gym.wrappers.transform_observation import TransformObservation from gym.wrappers.transform_reward import TransformReward from gym.wrappers.vector_list_info import VectorListInfo ================================================ FILE: gym/wrappers/atari_preprocessing.py ================================================ """Implementation of Atari 2600 Preprocessing following the guidelines of Machado et al., 2018.""" import numpy as np import gym from gym.spaces import Box try: import cv2 except ImportError: cv2 = None class AtariPreprocessing(gym.Wrapper): """Atari 2600 preprocessing wrapper. This class follows the guidelines in Machado et al. (2018), "Revisiting the Arcade Learning Environment: Evaluation Protocols and Open Problems for General Agents". Specifically, the following preprocess stages applies to the atari environment: - Noop Reset: Obtains the initial state by taking a random number of no-ops on reset, default max 30 no-ops. - Frame skipping: The number of frames skipped between steps, 4 by default - Max-pooling: Pools over the most recent two observations from the frame skips - Termination signal when a life is lost: When the agent losses a life during the environment, then the environment is terminated. Turned off by default. Not recommended by Machado et al. (2018). - Resize to a square image: Resizes the atari environment original observation shape from 210x180 to 84x84 by default - Grayscale observation: If the observation is colour or greyscale, by default, greyscale. - Scale observation: If to scale the observation between [0, 1) or [0, 255), by default, not scaled. """ def __init__( self, env: gym.Env, noop_max: int = 30, frame_skip: int = 4, screen_size: int = 84, terminal_on_life_loss: bool = False, grayscale_obs: bool = True, grayscale_newaxis: bool = False, scale_obs: bool = False, ): """Wrapper for Atari 2600 preprocessing. Args: env (Env): The environment to apply the preprocessing noop_max (int): For No-op reset, the max number no-ops actions are taken at reset, to turn off, set to 0. frame_skip (int): The number of frames between new observation the agents observations effecting the frequency at which the agent experiences the game. screen_size (int): resize Atari frame terminal_on_life_loss (bool): `if True`, then :meth:`step()` returns `terminated=True` whenever a life is lost. grayscale_obs (bool): if True, then gray scale observation is returned, otherwise, RGB observation is returned. grayscale_newaxis (bool): `if True and grayscale_obs=True`, then a channel axis is added to grayscale observations to make them 3-dimensional. scale_obs (bool): if True, then observation normalized in range [0,1) is returned. It also limits memory optimization benefits of FrameStack Wrapper. Raises: DependencyNotInstalled: opencv-python package not installed ValueError: Disable frame-skipping in the original env """ super().__init__(env) if cv2 is None: raise gym.error.DependencyNotInstalled( "opencv-python package not installed, run `pip install gym[other]` to get dependencies for atari" ) assert frame_skip > 0 assert screen_size > 0 assert noop_max >= 0 if frame_skip > 1: if ( "NoFrameskip" not in env.spec.id and getattr(env.unwrapped, "_frameskip", None) != 1 ): raise ValueError( "Disable frame-skipping in the original env. Otherwise, more than one " "frame-skip will happen as through this wrapper" ) self.noop_max = noop_max assert env.unwrapped.get_action_meanings()[0] == "NOOP" self.frame_skip = frame_skip self.screen_size = screen_size self.terminal_on_life_loss = terminal_on_life_loss self.grayscale_obs = grayscale_obs self.grayscale_newaxis = grayscale_newaxis self.scale_obs = scale_obs # buffer of most recent two observations for max pooling assert isinstance(env.observation_space, Box) if grayscale_obs: self.obs_buffer = [ np.empty(env.observation_space.shape[:2], dtype=np.uint8), np.empty(env.observation_space.shape[:2], dtype=np.uint8), ] else: self.obs_buffer = [ np.empty(env.observation_space.shape, dtype=np.uint8), np.empty(env.observation_space.shape, dtype=np.uint8), ] self.lives = 0 self.game_over = False _low, _high, _obs_dtype = ( (0, 255, np.uint8) if not scale_obs else (0, 1, np.float32) ) _shape = (screen_size, screen_size, 1 if grayscale_obs else 3) if grayscale_obs and not grayscale_newaxis: _shape = _shape[:-1] # Remove channel axis self.observation_space = Box( low=_low, high=_high, shape=_shape, dtype=_obs_dtype ) @property def ale(self): """Make ale as a class property to avoid serialization error.""" return self.env.unwrapped.ale def step(self, action): """Applies the preprocessing for an :meth:`env.step`.""" total_reward, terminated, truncated, info = 0.0, False, False, {} for t in range(self.frame_skip): _, reward, terminated, truncated, info = self.env.step(action) total_reward += reward self.game_over = terminated if self.terminal_on_life_loss: new_lives = self.ale.lives() terminated = terminated or new_lives < self.lives self.game_over = terminated self.lives = new_lives if terminated or truncated: break if t == self.frame_skip - 2: if self.grayscale_obs: self.ale.getScreenGrayscale(self.obs_buffer[1]) else: self.ale.getScreenRGB(self.obs_buffer[1]) elif t == self.frame_skip - 1: if self.grayscale_obs: self.ale.getScreenGrayscale(self.obs_buffer[0]) else: self.ale.getScreenRGB(self.obs_buffer[0]) return self._get_obs(), total_reward, terminated, truncated, info def reset(self, **kwargs): """Resets the environment using preprocessing.""" # NoopReset _, reset_info = self.env.reset(**kwargs) noops = ( self.env.unwrapped.np_random.integers(1, self.noop_max + 1) if self.noop_max > 0 else 0 ) for _ in range(noops): _, _, terminated, truncated, step_info = self.env.step(0) reset_info.update(step_info) if terminated or truncated: _, reset_info = self.env.reset(**kwargs) self.lives = self.ale.lives() if self.grayscale_obs: self.ale.getScreenGrayscale(self.obs_buffer[0]) else: self.ale.getScreenRGB(self.obs_buffer[0]) self.obs_buffer[1].fill(0) return self._get_obs(), reset_info def _get_obs(self): if self.frame_skip > 1: # more efficient in-place pooling np.maximum(self.obs_buffer[0], self.obs_buffer[1], out=self.obs_buffer[0]) assert cv2 is not None obs = cv2.resize( self.obs_buffer[0], (self.screen_size, self.screen_size), interpolation=cv2.INTER_AREA, ) if self.scale_obs: obs = np.asarray(obs, dtype=np.float32) / 255.0 else: obs = np.asarray(obs, dtype=np.uint8) if self.grayscale_obs and self.grayscale_newaxis: obs = np.expand_dims(obs, axis=-1) # Add a channel axis return obs ================================================ FILE: gym/wrappers/autoreset.py ================================================ """Wrapper that autoreset environments when `terminated=True` or `truncated=True`.""" import gym class AutoResetWrapper(gym.Wrapper): """A class for providing an automatic reset functionality for gym environments when calling :meth:`self.step`. When calling step causes :meth:`Env.step` to return `terminated=True` or `truncated=True`, :meth:`Env.reset` is called, and the return format of :meth:`self.step` is as follows: ``(new_obs, final_reward, final_terminated, final_truncated, info)`` with new step API and ``(new_obs, final_reward, final_done, info)`` with the old step API. - ``new_obs`` is the first observation after calling :meth:`self.env.reset` - ``final_reward`` is the reward after calling :meth:`self.env.step`, prior to calling :meth:`self.env.reset`. - ``final_terminated`` is the terminated value before calling :meth:`self.env.reset`. - ``final_truncated`` is the truncated value before calling :meth:`self.env.reset`. Both `final_terminated` and `final_truncated` cannot be False. - ``info`` is a dict containing all the keys from the info dict returned by the call to :meth:`self.env.reset`, with an additional key "final_observation" containing the observation returned by the last call to :meth:`self.env.step` and "final_info" containing the info dict returned by the last call to :meth:`self.env.step`. Warning: When using this wrapper to collect rollouts, note that when :meth:`Env.step` returns `terminated` or `truncated`, a new observation from after calling :meth:`Env.reset` is returned by :meth:`Env.step` alongside the final reward, terminated and truncated state from the previous episode. If you need the final state from the previous episode, you need to retrieve it via the "final_observation" key in the info dict. Make sure you know what you're doing if you use this wrapper! """ def __init__(self, env: gym.Env): """A class for providing an automatic reset functionality for gym environments when calling :meth:`self.step`. Args: env (gym.Env): The environment to apply the wrapper """ super().__init__(env) def step(self, action): """Steps through the environment with action and resets the environment if a terminated or truncated signal is encountered. Args: action: The action to take Returns: The autoreset environment :meth:`step` """ obs, reward, terminated, truncated, info = self.env.step(action) if terminated or truncated: new_obs, new_info = self.env.reset() assert ( "final_observation" not in new_info ), 'info dict cannot contain key "final_observation" ' assert ( "final_info" not in new_info ), 'info dict cannot contain key "final_info" ' new_info["final_observation"] = obs new_info["final_info"] = info obs = new_obs info = new_info return obs, reward, terminated, truncated, info ================================================ FILE: gym/wrappers/clip_action.py ================================================ """Wrapper for clipping actions within a valid bound.""" import numpy as np import gym from gym import ActionWrapper from gym.spaces import Box class ClipAction(ActionWrapper): """Clip the continuous action within the valid :class:`Box` observation space bound. Example: >>> import gym >>> env = gym.make('Bipedal-Walker-v3') >>> env = ClipAction(env) >>> env.action_space Box(-1.0, 1.0, (4,), float32) >>> env.step(np.array([5.0, 2.0, -10.0, 0.0])) # Executes the action np.array([1.0, 1.0, -1.0, 0]) in the base environment """ def __init__(self, env: gym.Env): """A wrapper for clipping continuous actions within the valid bound. Args: env: The environment to apply the wrapper """ assert isinstance(env.action_space, Box) super().__init__(env) def action(self, action): """Clips the action within the valid bounds. Args: action: The action to clip Returns: The clipped action """ return np.clip(action, self.action_space.low, self.action_space.high) ================================================ FILE: gym/wrappers/compatibility.py ================================================ """A compatibility wrapper converting an old-style environment into a valid environment.""" import sys from typing import Any, Dict, Optional, Tuple import gym from gym.core import ObsType from gym.utils.step_api_compatibility import convert_to_terminated_truncated_step_api if sys.version_info >= (3, 8): from typing import Protocol, runtime_checkable elif sys.version_info >= (3, 7): from typing_extensions import Protocol, runtime_checkable else: Protocol = object runtime_checkable = lambda x: x # noqa: E731 @runtime_checkable class LegacyEnv(Protocol): """A protocol for environments using the old step API.""" observation_space: gym.Space action_space: gym.Space def reset(self) -> Any: """Reset the environment and return the initial observation.""" ... def step(self, action: Any) -> Tuple[Any, float, bool, Dict]: """Run one timestep of the environment's dynamics.""" ... def render(self, mode: Optional[str] = "human") -> Any: """Render the environment.""" ... def close(self): """Close the environment.""" ... def seed(self, seed: Optional[int] = None): """Set the seed for this env's random number generator(s).""" ... class EnvCompatibility(gym.Env): r"""A wrapper which can transform an environment from the old API to the new API. Old step API refers to step() method returning (observation, reward, done, info), and reset() only retuning the observation. New step API refers to step() method returning (observation, reward, terminated, truncated, info) and reset() returning (observation, info). (Refer to docs for details on the API change) Known limitations: - Environments that use `self.np_random` might not work as expected. """ def __init__(self, old_env: LegacyEnv, render_mode: Optional[str] = None): """A wrapper which converts old-style envs to valid modern envs. Some information may be lost in the conversion, so we recommend updating your environment. Args: old_env (LegacyEnv): the env to wrap, implemented with the old API render_mode (str): the render mode to use when rendering the environment, passed automatically to env.render """ self.metadata = getattr(old_env, "metadata", {"render_modes": []}) self.render_mode = render_mode self.reward_range = getattr(old_env, "reward_range", None) self.spec = getattr(old_env, "spec", None) self.env = old_env self.observation_space = old_env.observation_space self.action_space = old_env.action_space def reset( self, seed: Optional[int] = None, options: Optional[dict] = None ) -> Tuple[ObsType, dict]: """Resets the environment. Args: seed: the seed to reset the environment with options: the options to reset the environment with Returns: (observation, info) """ if seed is not None: self.env.seed(seed) # Options are ignored if self.render_mode == "human": self.render() return self.env.reset(), {} def step(self, action: Any) -> Tuple[Any, float, bool, bool, Dict]: """Steps through the environment. Args: action: action to step through the environment with Returns: (observation, reward, terminated, truncated, info) """ obs, reward, done, info = self.env.step(action) if self.render_mode == "human": self.render() return convert_to_terminated_truncated_step_api((obs, reward, done, info)) def render(self) -> Any: """Renders the environment. Returns: The rendering of the environment, depending on the render mode """ return self.env.render(mode=self.render_mode) def close(self): """Closes the environment.""" self.env.close() def __str__(self): """Returns the wrapper name and the unwrapped environment string.""" return f"<{type(self).__name__}{self.env}>" def __repr__(self): """Returns the string representation of the wrapper.""" return str(self) ================================================ FILE: gym/wrappers/env_checker.py ================================================ """A passive environment checker wrapper for an environment's observation and action space along with the reset, step and render functions.""" import gym from gym.core import ActType from gym.utils.passive_env_checker import ( check_action_space, check_observation_space, env_render_passive_checker, env_reset_passive_checker, env_step_passive_checker, ) class PassiveEnvChecker(gym.Wrapper): """A passive environment checker wrapper that surrounds the step, reset and render functions to check they follow the gym API.""" def __init__(self, env): """Initialises the wrapper with the environments, run the observation and action space tests.""" super().__init__(env) assert hasattr( env, "action_space" ), "The environment must specify an action space. https://www.gymlibrary.dev/content/environment_creation/" check_action_space(env.action_space) assert hasattr( env, "observation_space" ), "The environment must specify an observation space. https://www.gymlibrary.dev/content/environment_creation/" check_observation_space(env.observation_space) self.checked_reset = False self.checked_step = False self.checked_render = False def step(self, action: ActType): """Steps through the environment that on the first call will run the `passive_env_step_check`.""" if self.checked_step is False: self.checked_step = True return env_step_passive_checker(self.env, action) else: return self.env.step(action) def reset(self, **kwargs): """Resets the environment that on the first call will run the `passive_env_reset_check`.""" if self.checked_reset is False: self.checked_reset = True return env_reset_passive_checker(self.env, **kwargs) else: return self.env.reset(**kwargs) def render(self, *args, **kwargs): """Renders the environment that on the first call will run the `passive_env_render_check`.""" if self.checked_render is False: self.checked_render = True return env_render_passive_checker(self.env, *args, **kwargs) else: return self.env.render(*args, **kwargs) ================================================ FILE: gym/wrappers/filter_observation.py ================================================ """A wrapper for filtering dictionary observations by their keys.""" import copy from typing import Sequence import gym from gym import spaces class FilterObservation(gym.ObservationWrapper): """Filter Dict observation space by the keys. Example: >>> import gym >>> env = gym.wrappers.TransformObservation( ... gym.make('CartPole-v1'), lambda obs: {'obs': obs, 'time': 0} ... ) >>> env.observation_space = gym.spaces.Dict(obs=env.observation_space, time=gym.spaces.Discrete(1)) >>> env.reset() {'obs': array([-0.00067088, -0.01860439, 0.04772898, -0.01911527], dtype=float32), 'time': 0} >>> env = FilterObservation(env, filter_keys=['time']) >>> env.reset() {'obs': array([ 0.04560107, 0.04466959, -0.0328232 , -0.02367178], dtype=float32)} >>> env.step(0) ({'obs': array([ 0.04649447, -0.14996664, -0.03329664, 0.25847703], dtype=float32)}, 1.0, False, {}) """ def __init__(self, env: gym.Env, filter_keys: Sequence[str] = None): """A wrapper that filters dictionary observations by their keys. Args: env: The environment to apply the wrapper filter_keys: List of keys to be included in the observations. If ``None``, observations will not be filtered and this wrapper has no effect Raises: ValueError: If the environment's observation space is not :class:`spaces.Dict` ValueError: If any of the `filter_keys` are not included in the original `env`'s observation space """ super().__init__(env) wrapped_observation_space = env.observation_space if not isinstance(wrapped_observation_space, spaces.Dict): raise ValueError( f"FilterObservationWrapper is only usable with dict observations, " f"environment observation space is {type(wrapped_observation_space)}" ) observation_keys = wrapped_observation_space.spaces.keys() if filter_keys is None: filter_keys = tuple(observation_keys) missing_keys = {key for key in filter_keys if key not in observation_keys} if missing_keys: raise ValueError( "All the filter_keys must be included in the original observation space.\n" f"Filter keys: {filter_keys}\n" f"Observation keys: {observation_keys}\n" f"Missing keys: {missing_keys}" ) self.observation_space = type(wrapped_observation_space)( [ (name, copy.deepcopy(space)) for name, space in wrapped_observation_space.spaces.items() if name in filter_keys ] ) self._env = env self._filter_keys = tuple(filter_keys) def observation(self, observation): """Filters the observations. Args: observation: The observation to filter Returns: The filtered observations """ filter_observation = self._filter_observation(observation) return filter_observation def _filter_observation(self, observation): observation = type(observation)( [ (name, value) for name, value in observation.items() if name in self._filter_keys ] ) return observation ================================================ FILE: gym/wrappers/flatten_observation.py ================================================ """Wrapper for flattening observations of an environment.""" import gym import gym.spaces as spaces class FlattenObservation(gym.ObservationWrapper): """Observation wrapper that flattens the observation. Example: >>> import gym >>> env = gym.make('CarRacing-v1') >>> env.observation_space.shape (96, 96, 3) >>> env = FlattenObservation(env) >>> env.observation_space.shape (27648,) >>> obs = env.reset() >>> obs.shape (27648,) """ def __init__(self, env: gym.Env): """Flattens the observations of an environment. Args: env: The environment to apply the wrapper """ super().__init__(env) self.observation_space = spaces.flatten_space(env.observation_space) def observation(self, observation): """Flattens an observation. Args: observation: The observation to flatten Returns: The flattened observation """ return spaces.flatten(self.env.observation_space, observation) ================================================ FILE: gym/wrappers/frame_stack.py ================================================ """Wrapper that stacks frames.""" from collections import deque from typing import Union import numpy as np import gym from gym.error import DependencyNotInstalled from gym.spaces import Box class LazyFrames: """Ensures common frames are only stored once to optimize memory use. To further reduce the memory use, it is optionally to turn on lz4 to compress the observations. Note: This object should only be converted to numpy array just before forward pass. """ __slots__ = ("frame_shape", "dtype", "shape", "lz4_compress", "_frames") def __init__(self, frames: list, lz4_compress: bool = False): """Lazyframe for a set of frames and if to apply lz4. Args: frames (list): The frames to convert to lazy frames lz4_compress (bool): Use lz4 to compress the frames internally Raises: DependencyNotInstalled: lz4 is not installed """ self.frame_shape = tuple(frames[0].shape) self.shape = (len(frames),) + self.frame_shape self.dtype = frames[0].dtype if lz4_compress: try: from lz4.block import compress except ImportError: raise DependencyNotInstalled( "lz4 is not installed, run `pip install gym[other]`" ) frames = [compress(frame) for frame in frames] self._frames = frames self.lz4_compress = lz4_compress def __array__(self, dtype=None): """Gets a numpy array of stacked frames with specific dtype. Args: dtype: The dtype of the stacked frames Returns: The array of stacked frames with dtype """ arr = self[:] if dtype is not None: return arr.astype(dtype) return arr def __len__(self): """Returns the number of frame stacks. Returns: The number of frame stacks """ return self.shape[0] def __getitem__(self, int_or_slice: Union[int, slice]): """Gets the stacked frames for a particular index or slice. Args: int_or_slice: Index or slice to get items for Returns: np.stacked frames for the int or slice """ if isinstance(int_or_slice, int): return self._check_decompress(self._frames[int_or_slice]) # single frame return np.stack( [self._check_decompress(f) for f in self._frames[int_or_slice]], axis=0 ) def __eq__(self, other): """Checks that the current frames are equal to the other object.""" return self.__array__() == other def _check_decompress(self, frame): if self.lz4_compress: from lz4.block import decompress return np.frombuffer(decompress(frame), dtype=self.dtype).reshape( self.frame_shape ) return frame class FrameStack(gym.ObservationWrapper): """Observation wrapper that stacks the observations in a rolling manner. For example, if the number of stacks is 4, then the returned observation contains the most recent 4 observations. For environment 'Pendulum-v1', the original observation is an array with shape [3], so if we stack 4 observations, the processed observation has shape [4, 3]. Note: - To be memory efficient, the stacked observations are wrapped by :class:`LazyFrame`. - The observation space must be :class:`Box` type. If one uses :class:`Dict` as observation space, it should apply :class:`FlattenObservation` wrapper first. - After :meth:`reset` is called, the frame buffer will be filled with the initial observation. I.e. the observation returned by :meth:`reset` will consist of ``num_stack`-many identical frames, Example: >>> import gym >>> env = gym.make('CarRacing-v1') >>> env = FrameStack(env, 4) >>> env.observation_space Box(4, 96, 96, 3) >>> obs = env.reset() >>> obs.shape (4, 96, 96, 3) """ def __init__( self, env: gym.Env, num_stack: int, lz4_compress: bool = False, ): """Observation wrapper that stacks the observations in a rolling manner. Args: env (Env): The environment to apply the wrapper num_stack (int): The number of frames to stack lz4_compress (bool): Use lz4 to compress the frames internally """ super().__init__(env) self.num_stack = num_stack self.lz4_compress = lz4_compress self.frames = deque(maxlen=num_stack) low = np.repeat(self.observation_space.low[np.newaxis, ...], num_stack, axis=0) high = np.repeat( self.observation_space.high[np.newaxis, ...], num_stack, axis=0 ) self.observation_space = Box( low=low, high=high, dtype=self.observation_space.dtype ) def observation(self, observation): """Converts the wrappers current frames to lazy frames. Args: observation: Ignored Returns: :class:`LazyFrames` object for the wrapper's frame buffer, :attr:`self.frames` """ assert len(self.frames) == self.num_stack, (len(self.frames), self.num_stack) return LazyFrames(list(self.frames), self.lz4_compress) def step(self, action): """Steps through the environment, appending the observation to the frame buffer. Args: action: The action to step through the environment with Returns: Stacked observations, reward, terminated, truncated, and information from the environment """ observation, reward, terminated, truncated, info = self.env.step(action) self.frames.append(observation) return self.observation(None), reward, terminated, truncated, info def reset(self, **kwargs): """Reset the environment with kwargs. Args: **kwargs: The kwargs for the environment reset Returns: The stacked observations """ obs, info = self.env.reset(**kwargs) [self.frames.append(obs) for _ in range(self.num_stack)] return self.observation(None), info ================================================ FILE: gym/wrappers/gray_scale_observation.py ================================================ """Wrapper that converts a color observation to grayscale.""" import numpy as np import gym from gym.spaces import Box class GrayScaleObservation(gym.ObservationWrapper): """Convert the image observation from RGB to gray scale. Example: >>> env = gym.make('CarRacing-v1') >>> env.observation_space Box(0, 255, (96, 96, 3), uint8) >>> env = GrayScaleObservation(gym.make('CarRacing-v1')) >>> env.observation_space Box(0, 255, (96, 96), uint8) >>> env = GrayScaleObservation(gym.make('CarRacing-v1'), keep_dim=True) >>> env.observation_space Box(0, 255, (96, 96, 1), uint8) """ def __init__(self, env: gym.Env, keep_dim: bool = False): """Convert the image observation from RGB to gray scale. Args: env (Env): The environment to apply the wrapper keep_dim (bool): If `True`, a singleton dimension will be added, i.e. observations are of the shape AxBx1. Otherwise, they are of shape AxB. """ super().__init__(env) self.keep_dim = keep_dim assert ( isinstance(self.observation_space, Box) and len(self.observation_space.shape) == 3 and self.observation_space.shape[-1] == 3 ) obs_shape = self.observation_space.shape[:2] if self.keep_dim: self.observation_space = Box( low=0, high=255, shape=(obs_shape[0], obs_shape[1], 1), dtype=np.uint8 ) else: self.observation_space = Box( low=0, high=255, shape=obs_shape, dtype=np.uint8 ) def observation(self, observation): """Converts the colour observation to greyscale. Args: observation: Color observations Returns: Grayscale observations """ import cv2 observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY) if self.keep_dim: observation = np.expand_dims(observation, -1) return observation ================================================ FILE: gym/wrappers/human_rendering.py ================================================ """A wrapper that adds human-renering functionality to an environment.""" import numpy as np import gym from gym.error import DependencyNotInstalled class HumanRendering(gym.Wrapper): """Performs human rendering for an environment that only supports "rgb_array"rendering. This wrapper is particularly useful when you have implemented an environment that can produce RGB images but haven't implemented any code to render the images to the screen. If you want to use this wrapper with your environments, remember to specify ``"render_fps"`` in the metadata of your environment. The ``render_mode`` of the wrapped environment must be either ``'rgb_array'`` or ``'rgb_array_list'``. Example: >>> env = gym.make("LunarLander-v2", render_mode="rgb_array") >>> wrapped = HumanRendering(env) >>> wrapped.reset() # This will start rendering to the screen The wrapper can also be applied directly when the environment is instantiated, simply by passing ``render_mode="human"`` to ``make``. The wrapper will only be applied if the environment does not implement human-rendering natively (i.e. ``render_mode`` does not contain ``"human"``). Example: >>> env = gym.make("NoNativeRendering-v2", render_mode="human") # NoNativeRendering-v0 doesn't implement human-rendering natively >>> env.reset() # This will start rendering to the screen Warning: If the base environment uses ``render_mode="rgb_array_list"``, its (i.e. the *base environment's*) render method will always return an empty list: >>> env = gym.make("LunarLander-v2", render_mode="rgb_array_list") >>> wrapped = HumanRendering(env) >>> wrapped.reset() >>> env.render() [] # env.render() will always return an empty list! """ def __init__(self, env): """Initialize a :class:`HumanRendering` instance. Args: env: The environment that is being wrapped """ super().__init__(env) assert env.render_mode in [ "rgb_array", "rgb_array_list", ], f"Expected env.render_mode to be one of 'rgb_array' or 'rgb_array_list' but got '{env.render_mode}'" assert ( "render_fps" in env.metadata ), "The base environment must specify 'render_fps' to be used with the HumanRendering wrapper" self.screen_size = None self.window = None self.clock = None @property def render_mode(self): """Always returns ``'human'``.""" return "human" def step(self, *args, **kwargs): """Perform a step in the base environment and render a frame to the screen.""" result = self.env.step(*args, **kwargs) self._render_frame() return result def reset(self, *args, **kwargs): """Reset the base environment and render a frame to the screen.""" result = self.env.reset(*args, **kwargs) self._render_frame() return result def render(self): """This method doesn't do much, actual rendering is performed in :meth:`step` and :meth:`reset`.""" return None def _render_frame(self): """Fetch the last frame from the base environment and render it to the screen.""" try: import pygame except ImportError: raise DependencyNotInstalled( "pygame is not installed, run `pip install gym[box2d]`" ) if self.env.render_mode == "rgb_array_list": last_rgb_array = self.env.render() assert isinstance(last_rgb_array, list) last_rgb_array = last_rgb_array[-1] elif self.env.render_mode == "rgb_array": last_rgb_array = self.env.render() else: raise Exception( f"Wrapped environment must have mode 'rgb_array' or 'rgb_array_list', actual render mode: {self.env.render_mode}" ) assert isinstance(last_rgb_array, np.ndarray) rgb_array = np.transpose(last_rgb_array, axes=(1, 0, 2)) if self.screen_size is None: self.screen_size = rgb_array.shape[:2] assert ( self.screen_size == rgb_array.shape[:2] ), f"The shape of the rgb array has changed from {self.screen_size} to {rgb_array.shape[:2]}" if self.window is None: pygame.init() pygame.display.init() self.window = pygame.display.set_mode(self.screen_size) if self.clock is None: self.clock = pygame.time.Clock() surf = pygame.surfarray.make_surface(rgb_array) self.window.blit(surf, (0, 0)) pygame.event.pump() self.clock.tick(self.metadata["render_fps"]) pygame.display.flip() def close(self): """Close the rendering window.""" super().close() if self.window is not None: import pygame pygame.display.quit() pygame.quit() ================================================ FILE: gym/wrappers/monitoring/__init__.py ================================================ """Module for monitoring.video_recorder.""" ================================================ FILE: gym/wrappers/monitoring/video_recorder.py ================================================ """A wrapper for video recording environments by rolling it out, frame by frame.""" import json import os import os.path import tempfile from typing import List, Optional from gym import error, logger class VideoRecorder: """VideoRecorder renders a nice movie of a rollout, frame by frame. It comes with an ``enabled`` option, so you can still use the same code on episodes where you don't want to record video. Note: You are responsible for calling :meth:`close` on a created VideoRecorder, or else you may leak an encoder process. """ def __init__( self, env, path: Optional[str] = None, metadata: Optional[dict] = None, enabled: bool = True, base_path: Optional[str] = None, ): """Video recorder renders a nice movie of a rollout, frame by frame. Args: env (Env): Environment to take video of. path (Optional[str]): Path to the video file; will be randomly chosen if omitted. metadata (Optional[dict]): Contents to save to the metadata file. enabled (bool): Whether to actually record video, or just no-op (for convenience) base_path (Optional[str]): Alternatively, path to the video file without extension, which will be added. Raises: Error: You can pass at most one of `path` or `base_path` Error: Invalid path given that must have a particular file extension """ try: # check that moviepy is now installed import moviepy # noqa: F401 except ImportError: raise error.DependencyNotInstalled( "MoviePy is not installed, run `pip install moviepy`" ) self._async = env.metadata.get("semantics.async") self.enabled = enabled self._closed = False self.render_history = [] self.env = env self.render_mode = env.render_mode if "rgb_array_list" != self.render_mode and "rgb_array" != self.render_mode: logger.warn( f"Disabling video recorder because environment {env} was not initialized with any compatible video " "mode between `rgb_array` and `rgb_array_list`" ) # Disable since the environment has not been initialized with a compatible `render_mode` self.enabled = False # Don't bother setting anything else if not enabled if not self.enabled: return if path is not None and base_path is not None: raise error.Error("You can pass at most one of `path` or `base_path`.") required_ext = ".mp4" if path is None: if base_path is not None: # Base path given, append ext path = base_path + required_ext else: # Otherwise, just generate a unique filename with tempfile.NamedTemporaryFile(suffix=required_ext) as f: path = f.name self.path = path path_base, actual_ext = os.path.splitext(self.path) if actual_ext != required_ext: raise error.Error( f"Invalid path given: {self.path} -- must have file extension {required_ext}." ) self.frames_per_sec = env.metadata.get("render_fps", 30) self.broken = False # Dump metadata self.metadata = metadata or {} self.metadata["content_type"] = "video/mp4" self.metadata_path = f"{path_base}.meta.json" self.write_metadata() logger.info(f"Starting new video recorder writing to {self.path}") self.recorded_frames = [] @property def functional(self): """Returns if the video recorder is functional, is enabled and not broken.""" return self.enabled and not self.broken def capture_frame(self): """Render the given `env` and add the resulting frame to the video.""" frame = self.env.render() if isinstance(frame, List): self.render_history += frame frame = frame[-1] if not self.functional: return if self._closed: logger.warn( "The video recorder has been closed and no frames will be captured anymore." ) return logger.debug("Capturing video frame: path=%s", self.path) if frame is None: if self._async: return else: # Indicates a bug in the environment: don't want to raise # an error here. logger.warn( "Env returned None on `render()`. Disabling further rendering for video recorder by marking as " f"disabled: path={self.path} metadata_path={self.metadata_path}" ) self.broken = True else: self.recorded_frames.append(frame) def close(self): """Flush all data to disk and close any open frame encoders.""" if not self.enabled or self._closed: return # First close the environment self.env.close() # Close the encoder if len(self.recorded_frames) > 0: try: from moviepy.video.io.ImageSequenceClip import ImageSequenceClip except ImportError: raise error.DependencyNotInstalled( "MoviePy is not installed, run `pip install moviepy`" ) logger.debug(f"Closing video encoder: path={self.path}") clip = ImageSequenceClip(self.recorded_frames, fps=self.frames_per_sec) clip.write_videofile(self.path) else: # No frames captured. Set metadata. if self.metadata is None: self.metadata = {} self.metadata["empty"] = True self.write_metadata() # Stop tracking this for autoclose self._closed = True def write_metadata(self): """Writes metadata to metadata path.""" with open(self.metadata_path, "w") as f: json.dump(self.metadata, f) def __del__(self): """Closes the environment correctly when the recorder is deleted.""" # Make sure we've closed up shop when garbage collecting self.close() ================================================ FILE: gym/wrappers/normalize.py ================================================ """Set of wrappers for normalizing actions and observations.""" import numpy as np import gym # taken from https://github.com/openai/baselines/blob/master/baselines/common/vec_env/vec_normalize.py class RunningMeanStd: """Tracks the mean, variance and count of values.""" # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm def __init__(self, epsilon=1e-4, shape=()): """Tracks the mean, variance and count of values.""" self.mean = np.zeros(shape, "float64") self.var = np.ones(shape, "float64") self.count = epsilon def update(self, x): """Updates the mean, var and count from a batch of samples.""" batch_mean = np.mean(x, axis=0) batch_var = np.var(x, axis=0) batch_count = x.shape[0] self.update_from_moments(batch_mean, batch_var, batch_count) def update_from_moments(self, batch_mean, batch_var, batch_count): """Updates from batch mean, variance and count moments.""" self.mean, self.var, self.count = update_mean_var_count_from_moments( self.mean, self.var, self.count, batch_mean, batch_var, batch_count ) def update_mean_var_count_from_moments( mean, var, count, batch_mean, batch_var, batch_count ): """Updates the mean, var and count using the previous mean, var, count and batch values.""" delta = batch_mean - mean tot_count = count + batch_count new_mean = mean + delta * batch_count / tot_count m_a = var * count m_b = batch_var * batch_count M2 = m_a + m_b + np.square(delta) * count * batch_count / tot_count new_var = M2 / tot_count new_count = tot_count return new_mean, new_var, new_count class NormalizeObservation(gym.core.Wrapper): """This wrapper will normalize observations s.t. each coordinate is centered with unit variance. Note: The normalization depends on past trajectories and observations will not be normalized correctly if the wrapper was newly instantiated or the policy was changed recently. """ def __init__(self, env: gym.Env, epsilon: float = 1e-8): """This wrapper will normalize observations s.t. each coordinate is centered with unit variance. Args: env (Env): The environment to apply the wrapper epsilon: A stability parameter that is used when scaling the observations. """ super().__init__(env) self.num_envs = getattr(env, "num_envs", 1) self.is_vector_env = getattr(env, "is_vector_env", False) if self.is_vector_env: self.obs_rms = RunningMeanStd(shape=self.single_observation_space.shape) else: self.obs_rms = RunningMeanStd(shape=self.observation_space.shape) self.epsilon = epsilon def step(self, action): """Steps through the environment and normalizes the observation.""" obs, rews, terminateds, truncateds, infos = self.env.step(action) if self.is_vector_env: obs = self.normalize(obs) else: obs = self.normalize(np.array([obs]))[0] return obs, rews, terminateds, truncateds, infos def reset(self, **kwargs): """Resets the environment and normalizes the observation.""" obs, info = self.env.reset(**kwargs) if self.is_vector_env: return self.normalize(obs), info else: return self.normalize(np.array([obs]))[0], info def normalize(self, obs): """Normalises the observation using the running mean and variance of the observations.""" self.obs_rms.update(obs) return (obs - self.obs_rms.mean) / np.sqrt(self.obs_rms.var + self.epsilon) class NormalizeReward(gym.core.Wrapper): r"""This wrapper will normalize immediate rewards s.t. their exponential moving average has a fixed variance. The exponential moving average will have variance :math:`(1 - \gamma)^2`. Note: The scaling depends on past trajectories and rewards will not be scaled correctly if the wrapper was newly instantiated or the policy was changed recently. """ def __init__( self, env: gym.Env, gamma: float = 0.99, epsilon: float = 1e-8, ): """This wrapper will normalize immediate rewards s.t. their exponential moving average has a fixed variance. Args: env (env): The environment to apply the wrapper epsilon (float): A stability parameter gamma (float): The discount factor that is used in the exponential moving average. """ super().__init__(env) self.num_envs = getattr(env, "num_envs", 1) self.is_vector_env = getattr(env, "is_vector_env", False) self.return_rms = RunningMeanStd(shape=()) self.returns = np.zeros(self.num_envs) self.gamma = gamma self.epsilon = epsilon def step(self, action): """Steps through the environment, normalizing the rewards returned.""" obs, rews, terminateds, truncateds, infos = self.env.step(action) if not self.is_vector_env: rews = np.array([rews]) self.returns = self.returns * self.gamma + rews rews = self.normalize(rews) dones = np.logical_or(terminateds, truncateds) self.returns[dones] = 0.0 if not self.is_vector_env: rews = rews[0] return obs, rews, terminateds, truncateds, infos def normalize(self, rews): """Normalizes the rewards with the running mean rewards and their variance.""" self.return_rms.update(self.returns) return rews / np.sqrt(self.return_rms.var + self.epsilon) ================================================ FILE: gym/wrappers/order_enforcing.py ================================================ """Wrapper to enforce the proper ordering of environment operations.""" import gym from gym.error import ResetNeeded class OrderEnforcing(gym.Wrapper): """A wrapper that will produce an error if :meth:`step` is called before an initial :meth:`reset`. Example: >>> from gym.envs.classic_control import CartPoleEnv >>> env = CartPoleEnv() >>> env = OrderEnforcing(env) >>> env.step(0) ResetNeeded: Cannot call env.step() before calling env.reset() >>> env.render() ResetNeeded: Cannot call env.render() before calling env.reset() >>> env.reset() >>> env.render() >>> env.step(0) """ def __init__(self, env: gym.Env, disable_render_order_enforcing: bool = False): """A wrapper that will produce an error if :meth:`step` is called before an initial :meth:`reset`. Args: env: The environment to wrap disable_render_order_enforcing: If to disable render order enforcing """ super().__init__(env) self._has_reset: bool = False self._disable_render_order_enforcing: bool = disable_render_order_enforcing def step(self, action): """Steps through the environment with `kwargs`.""" if not self._has_reset: raise ResetNeeded("Cannot call env.step() before calling env.reset()") return self.env.step(action) def reset(self, **kwargs): """Resets the environment with `kwargs`.""" self._has_reset = True return self.env.reset(**kwargs) def render(self, *args, **kwargs): """Renders the environment with `kwargs`.""" if not self._disable_render_order_enforcing and not self._has_reset: raise ResetNeeded( "Cannot call `env.render()` before calling `env.reset()`, if this is a intended action, " "set `disable_render_order_enforcing=True` on the OrderEnforcer wrapper." ) return self.env.render(*args, **kwargs) @property def has_reset(self): """Returns if the environment has been reset before.""" return self._has_reset ================================================ FILE: gym/wrappers/pixel_observation.py ================================================ """Wrapper for augmenting observations by pixel values.""" import collections import copy from collections.abc import MutableMapping from typing import Any, Dict, List, Optional, Tuple import numpy as np import gym from gym import spaces STATE_KEY = "state" class PixelObservationWrapper(gym.ObservationWrapper): """Augment observations by pixel values. Observations of this wrapper will be dictionaries of images. You can also choose to add the observation of the base environment to this dictionary. In that case, if the base environment has an observation space of type :class:`Dict`, the dictionary of rendered images will be updated with the base environment's observation. If, however, the observation space is of type :class:`Box`, the base environment's observation (which will be an element of the :class:`Box` space) will be added to the dictionary under the key "state". Example: >>> import gym >>> env = PixelObservationWrapper(gym.make('CarRacing-v1', render_mode="rgb_array")) >>> obs = env.reset() >>> obs.keys() odict_keys(['pixels']) >>> obs['pixels'].shape (400, 600, 3) >>> env = PixelObservationWrapper(gym.make('CarRacing-v1', render_mode="rgb_array"), pixels_only=False) >>> obs = env.reset() >>> obs.keys() odict_keys(['state', 'pixels']) >>> obs['state'].shape (96, 96, 3) >>> obs['pixels'].shape (400, 600, 3) >>> env = PixelObservationWrapper(gym.make('CarRacing-v1', render_mode="rgb_array"), pixel_keys=('obs',)) >>> obs = env.reset() >>> obs.keys() odict_keys(['obs']) >>> obs['obs'].shape (400, 600, 3) """ def __init__( self, env: gym.Env, pixels_only: bool = True, render_kwargs: Optional[Dict[str, Dict[str, Any]]] = None, pixel_keys: Tuple[str, ...] = ("pixels",), ): """Initializes a new pixel Wrapper. Args: env: The environment to wrap. pixels_only (bool): If ``True`` (default), the original observation returned by the wrapped environment will be discarded, and a dictionary observation will only include pixels. If ``False``, the observation dictionary will contain both the original observations and the pixel observations. render_kwargs (dict): Optional dictionary containing that maps elements of ``pixel_keys``to keyword arguments passed to the :meth:`self.render` method. pixel_keys: Optional custom string specifying the pixel observation's key in the ``OrderedDict`` of observations. Defaults to ``(pixels,)``. Raises: AssertionError: If any of the keys in ``render_kwargs``do not show up in ``pixel_keys``. ValueError: If ``env``'s observation space is not compatible with the wrapper. Supported formats are a single array, or a dict of arrays. ValueError: If ``env``'s observation already contains any of the specified ``pixel_keys``. TypeError: When an unexpected pixel type is used """ super().__init__(env) # Avoid side-effects that occur when render_kwargs is manipulated render_kwargs = copy.deepcopy(render_kwargs) self.render_history = [] if render_kwargs is None: render_kwargs = {} for key in render_kwargs: assert key in pixel_keys, ( "The argument render_kwargs should map elements of " "pixel_keys to dictionaries of keyword arguments. " f"Found key '{key}' in render_kwargs but not in pixel_keys." ) default_render_kwargs = {} if not env.render_mode: raise AttributeError( "env.render_mode must be specified to use PixelObservationWrapper:" "`gym.make(env_name, render_mode='rgb_array')`." ) for key in pixel_keys: render_kwargs.setdefault(key, default_render_kwargs) wrapped_observation_space = env.observation_space if isinstance(wrapped_observation_space, spaces.Box): self._observation_is_dict = False invalid_keys = {STATE_KEY} elif isinstance(wrapped_observation_space, (spaces.Dict, MutableMapping)): self._observation_is_dict = True invalid_keys = set(wrapped_observation_space.spaces.keys()) else: raise ValueError("Unsupported observation space structure.") if not pixels_only: # Make sure that now keys in the `pixel_keys` overlap with # `observation_keys` overlapping_keys = set(pixel_keys) & set(invalid_keys) if overlapping_keys: raise ValueError( f"Duplicate or reserved pixel keys {overlapping_keys!r}." ) if pixels_only: self.observation_space = spaces.Dict() elif self._observation_is_dict: self.observation_space = copy.deepcopy(wrapped_observation_space) else: self.observation_space = spaces.Dict({STATE_KEY: wrapped_observation_space}) # Extend observation space with pixels. self.env.reset() pixels_spaces = {} for pixel_key in pixel_keys: pixels = self._render(**render_kwargs[pixel_key]) pixels: np.ndarray = pixels[-1] if isinstance(pixels, List) else pixels if not hasattr(pixels, "dtype") or not hasattr(pixels, "shape"): raise TypeError( f"Render method returns a {pixels.__class__.__name__}, but an array with dtype and shape is expected." "Be sure to specify the correct render_mode." ) if np.issubdtype(pixels.dtype, np.integer): low, high = (0, 255) elif np.issubdtype(pixels.dtype, np.float): low, high = (-float("inf"), float("inf")) else: raise TypeError(pixels.dtype) pixels_space = spaces.Box( shape=pixels.shape, low=low, high=high, dtype=pixels.dtype ) pixels_spaces[pixel_key] = pixels_space self.observation_space.spaces.update(pixels_spaces) self._pixels_only = pixels_only self._render_kwargs = render_kwargs self._pixel_keys = pixel_keys def observation(self, observation): """Updates the observations with the pixel observations. Args: observation: The observation to add pixel observations for Returns: The updated pixel observations """ pixel_observation = self._add_pixel_observation(observation) return pixel_observation def _add_pixel_observation(self, wrapped_observation): if self._pixels_only: observation = collections.OrderedDict() elif self._observation_is_dict: observation = type(wrapped_observation)(wrapped_observation) else: observation = collections.OrderedDict() observation[STATE_KEY] = wrapped_observation pixel_observations = { pixel_key: self._render(**self._render_kwargs[pixel_key]) for pixel_key in self._pixel_keys } observation.update(pixel_observations) return observation def render(self, *args, **kwargs): """Renders the environment.""" render = self.env.render(*args, **kwargs) if isinstance(render, list): render = self.render_history + render self.render_history = [] return render def _render(self, *args, **kwargs): render = self.env.render(*args, **kwargs) if isinstance(render, list): self.render_history += render return render ================================================ FILE: gym/wrappers/record_episode_statistics.py ================================================ """Wrapper that tracks the cumulative rewards and episode lengths.""" import time from collections import deque from typing import Optional import numpy as np import gym def add_vector_episode_statistics( info: dict, episode_info: dict, num_envs: int, env_num: int ): """Add episode statistics. Add statistics coming from the vectorized environment. Args: info (dict): info dict of the environment. episode_info (dict): episode statistics data. num_envs (int): number of environments. env_num (int): env number of the vectorized environments. Returns: info (dict): the input info dict with the episode statistics. """ info["episode"] = info.get("episode", {}) info["_episode"] = info.get("_episode", np.zeros(num_envs, dtype=bool)) info["_episode"][env_num] = True for k in episode_info.keys(): info_array = info["episode"].get(k, np.zeros(num_envs)) info_array[env_num] = episode_info[k] info["episode"][k] = info_array return info class RecordEpisodeStatistics(gym.Wrapper): """This wrapper will keep track of cumulative rewards and episode lengths. At the end of an episode, the statistics of the episode will be added to ``info`` using the key ``episode``. If using a vectorized environment also the key ``_episode`` is used which indicates whether the env at the respective index has the episode statistics. After the completion of an episode, ``info`` will look like this:: >>> info = { ... ... ... "episode": { ... "r": "", ... "l": "", ... "t": "" ... }, ... } For a vectorized environments the output will be in the form of:: >>> infos = { ... ... ... "episode": { ... "r": "", ... "l": "", ... "t": "" ... }, ... "_episode": "" ... } Moreover, the most recent rewards and episode lengths are stored in buffers that can be accessed via :attr:`wrapped_env.return_queue` and :attr:`wrapped_env.length_queue` respectively. Attributes: return_queue: The cumulative rewards of the last ``deque_size``-many episodes length_queue: The lengths of the last ``deque_size``-many episodes """ def __init__(self, env: gym.Env, deque_size: int = 100): """This wrapper will keep track of cumulative rewards and episode lengths. Args: env (Env): The environment to apply the wrapper deque_size: The size of the buffers :attr:`return_queue` and :attr:`length_queue` """ super().__init__(env) self.num_envs = getattr(env, "num_envs", 1) self.t0 = time.perf_counter() self.episode_count = 0 self.episode_returns: Optional[np.ndarray] = None self.episode_lengths: Optional[np.ndarray] = None self.return_queue = deque(maxlen=deque_size) self.length_queue = deque(maxlen=deque_size) self.is_vector_env = getattr(env, "is_vector_env", False) def reset(self, **kwargs): """Resets the environment using kwargs and resets the episode returns and lengths.""" observations = super().reset(**kwargs) self.episode_returns = np.zeros(self.num_envs, dtype=np.float32) self.episode_lengths = np.zeros(self.num_envs, dtype=np.int32) return observations def step(self, action): """Steps through the environment, recording the episode statistics.""" ( observations, rewards, terminateds, truncateds, infos, ) = self.env.step(action) assert isinstance( infos, dict ), f"`info` dtype is {type(infos)} while supported dtype is `dict`. This may be due to usage of other wrappers in the wrong order." self.episode_returns += rewards self.episode_lengths += 1 if not self.is_vector_env: terminateds = [terminateds] truncateds = [truncateds] terminateds = list(terminateds) truncateds = list(truncateds) for i in range(len(terminateds)): if terminateds[i] or truncateds[i]: episode_return = self.episode_returns[i] episode_length = self.episode_lengths[i] episode_info = { "episode": { "r": episode_return, "l": episode_length, "t": round(time.perf_counter() - self.t0, 6), } } if self.is_vector_env: infos = add_vector_episode_statistics( infos, episode_info["episode"], self.num_envs, i ) else: infos = {**infos, **episode_info} self.return_queue.append(episode_return) self.length_queue.append(episode_length) self.episode_count += 1 self.episode_returns[i] = 0 self.episode_lengths[i] = 0 return ( observations, rewards, terminateds if self.is_vector_env else terminateds[0], truncateds if self.is_vector_env else truncateds[0], infos, ) ================================================ FILE: gym/wrappers/record_video.py ================================================ """Wrapper for recording videos.""" import os from typing import Callable, Optional import gym from gym import logger from gym.wrappers.monitoring import video_recorder def capped_cubic_video_schedule(episode_id: int) -> bool: """The default episode trigger. This function will trigger recordings at the episode indices 0, 1, 4, 8, 27, ..., :math:`k^3`, ..., 729, 1000, 2000, 3000, ... Args: episode_id: The episode number Returns: If to apply a video schedule number """ if episode_id < 1000: return int(round(episode_id ** (1.0 / 3))) ** 3 == episode_id else: return episode_id % 1000 == 0 class RecordVideo(gym.Wrapper): """This wrapper records videos of rollouts. Usually, you only want to record episodes intermittently, say every hundredth episode. To do this, you can specify **either** ``episode_trigger`` **or** ``step_trigger`` (not both). They should be functions returning a boolean that indicates whether a recording should be started at the current episode or step, respectively. If neither :attr:`episode_trigger` nor ``step_trigger`` is passed, a default ``episode_trigger`` will be employed. By default, the recording will be stopped once a `terminated` or `truncated` signal has been emitted by the environment. However, you can also create recordings of fixed length (possibly spanning several episodes) by passing a strictly positive value for ``video_length``. """ def __init__( self, env: gym.Env, video_folder: str, episode_trigger: Callable[[int], bool] = None, step_trigger: Callable[[int], bool] = None, video_length: int = 0, name_prefix: str = "rl-video", ): """Wrapper records videos of rollouts. Args: env: The environment that will be wrapped video_folder (str): The folder where the recordings will be stored episode_trigger: Function that accepts an integer and returns ``True`` iff a recording should be started at this episode step_trigger: Function that accepts an integer and returns ``True`` iff a recording should be started at this step video_length (int): The length of recorded episodes. If 0, entire episodes are recorded. Otherwise, snippets of the specified length are captured name_prefix (str): Will be prepended to the filename of the recordings """ super().__init__(env) if episode_trigger is None and step_trigger is None: episode_trigger = capped_cubic_video_schedule trigger_count = sum(x is not None for x in [episode_trigger, step_trigger]) assert trigger_count == 1, "Must specify exactly one trigger" self.episode_trigger = episode_trigger self.step_trigger = step_trigger self.video_recorder: Optional[video_recorder.VideoRecorder] = None self.video_folder = os.path.abspath(video_folder) # Create output folder if needed if os.path.isdir(self.video_folder): logger.warn( f"Overwriting existing videos at {self.video_folder} folder " f"(try specifying a different `video_folder` for the `RecordVideo` wrapper if this is not desired)" ) os.makedirs(self.video_folder, exist_ok=True) self.name_prefix = name_prefix self.step_id = 0 self.video_length = video_length self.recording = False self.terminated = False self.truncated = False self.recorded_frames = 0 self.is_vector_env = getattr(env, "is_vector_env", False) self.episode_id = 0 def reset(self, **kwargs): """Reset the environment using kwargs and then starts recording if video enabled.""" observations = super().reset(**kwargs) self.terminated = False self.truncated = False if self.recording: assert self.video_recorder is not None self.video_recorder.frames = [] self.video_recorder.capture_frame() self.recorded_frames += 1 if self.video_length > 0: if self.recorded_frames > self.video_length: self.close_video_recorder() elif self._video_enabled(): self.start_video_recorder() return observations def start_video_recorder(self): """Starts video recorder using :class:`video_recorder.VideoRecorder`.""" self.close_video_recorder() video_name = f"{self.name_prefix}-step-{self.step_id}" if self.episode_trigger: video_name = f"{self.name_prefix}-episode-{self.episode_id}" base_path = os.path.join(self.video_folder, video_name) self.video_recorder = video_recorder.VideoRecorder( env=self.env, base_path=base_path, metadata={"step_id": self.step_id, "episode_id": self.episode_id}, ) self.video_recorder.capture_frame() self.recorded_frames = 1 self.recording = True def _video_enabled(self): if self.step_trigger: return self.step_trigger(self.step_id) else: return self.episode_trigger(self.episode_id) def step(self, action): """Steps through the environment using action, recording observations if :attr:`self.recording`.""" ( observations, rewards, terminateds, truncateds, infos, ) = self.env.step(action) if not (self.terminated or self.truncated): # increment steps and episodes self.step_id += 1 if not self.is_vector_env: if terminateds or truncateds: self.episode_id += 1 self.terminated = terminateds self.truncated = truncateds elif terminateds[0] or truncateds[0]: self.episode_id += 1 self.terminated = terminateds[0] self.truncated = truncateds[0] if self.recording: assert self.video_recorder is not None self.video_recorder.capture_frame() self.recorded_frames += 1 if self.video_length > 0: if self.recorded_frames > self.video_length: self.close_video_recorder() else: if not self.is_vector_env: if terminateds or truncateds: self.close_video_recorder() elif terminateds[0] or truncateds[0]: self.close_video_recorder() elif self._video_enabled(): self.start_video_recorder() return observations, rewards, terminateds, truncateds, infos def close_video_recorder(self): """Closes the video recorder if currently recording.""" if self.recording: assert self.video_recorder is not None self.video_recorder.close() self.recording = False self.recorded_frames = 1 def render(self, *args, **kwargs): """Compute the render frames as specified by render_mode attribute during initialization of the environment or as specified in kwargs.""" if self.video_recorder is None or not self.video_recorder.enabled: return super().render(*args, **kwargs) if len(self.video_recorder.render_history) > 0: recorded_frames = [ self.video_recorder.render_history.pop() for _ in range(len(self.video_recorder.render_history)) ] if self.recording: return recorded_frames else: return recorded_frames + super().render(*args, **kwargs) else: if self.recording: return self.video_recorder.last_frame else: return super().render(*args, **kwargs) def close(self): """Closes the wrapper then the video recorder.""" super().close() self.close_video_recorder() def __del__(self): """Closes the video recorder.""" self.close_video_recorder() ================================================ FILE: gym/wrappers/render_collection.py ================================================ """A wrapper that adds render collection mode to an environment.""" import gym class RenderCollection(gym.Wrapper): """Save collection of render frames.""" def __init__(self, env: gym.Env, pop_frames: bool = True, reset_clean: bool = True): """Initialize a :class:`RenderCollection` instance. Args: env: The environment that is being wrapped pop_frames (bool): If true, clear the collection frames after .render() is called. Default value is True. reset_clean (bool): If true, clear the collection frames when .reset() is called. Default value is True. """ super().__init__(env) assert env.render_mode is not None assert not env.render_mode.endswith("_list") self.frame_list = [] self.reset_clean = reset_clean self.pop_frames = pop_frames @property def render_mode(self): """Returns the collection render_mode name.""" return f"{self.env.render_mode}_list" def step(self, *args, **kwargs): """Perform a step in the base environment and collect a frame.""" output = self.env.step(*args, **kwargs) self.frame_list.append(self.env.render()) return output def reset(self, *args, **kwargs): """Reset the base environment, eventually clear the frame_list, and collect a frame.""" result = self.env.reset(*args, **kwargs) if self.reset_clean: self.frame_list = [] self.frame_list.append(self.env.render()) return result def render(self): """Returns the collection of frames and, if pop_frames = True, clears it.""" frames = self.frame_list if self.pop_frames: self.frame_list = [] return frames ================================================ FILE: gym/wrappers/rescale_action.py ================================================ """Wrapper for rescaling actions to within a max and min action.""" from typing import Union import numpy as np import gym from gym import spaces class RescaleAction(gym.ActionWrapper): """Affinely rescales the continuous action space of the environment to the range [min_action, max_action]. The base environment :attr:`env` must have an action space of type :class:`spaces.Box`. If :attr:`min_action` or :attr:`max_action` are numpy arrays, the shape must match the shape of the environment's action space. Example: >>> import gym >>> env = gym.make('BipedalWalker-v3') >>> env.action_space Box(-1.0, 1.0, (4,), float32) >>> min_action = -0.5 >>> max_action = np.array([0.0, 0.5, 1.0, 0.75]) >>> env = RescaleAction(env, min_action=min_action, max_action=max_action) >>> env.action_space Box(-0.5, [0. 0.5 1. 0.75], (4,), float32) >>> RescaleAction(env, min_action, max_action).action_space == gym.spaces.Box(min_action, max_action) True """ def __init__( self, env: gym.Env, min_action: Union[float, int, np.ndarray], max_action: Union[float, int, np.ndarray], ): """Initializes the :class:`RescaleAction` wrapper. Args: env (Env): The environment to apply the wrapper min_action (float, int or np.ndarray): The min values for each action. This may be a numpy array or a scalar. max_action (float, int or np.ndarray): The max values for each action. This may be a numpy array or a scalar. """ assert isinstance( env.action_space, spaces.Box ), f"expected Box action space, got {type(env.action_space)}" assert np.less_equal(min_action, max_action).all(), (min_action, max_action) super().__init__(env) self.min_action = ( np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + min_action ) self.max_action = ( np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + max_action ) self.action_space = spaces.Box( low=min_action, high=max_action, shape=env.action_space.shape, dtype=env.action_space.dtype, ) def action(self, action): """Rescales the action affinely from [:attr:`min_action`, :attr:`max_action`] to the action space of the base environment, :attr:`env`. Args: action: The action to rescale Returns: The rescaled action """ assert np.all(np.greater_equal(action, self.min_action)), ( action, self.min_action, ) assert np.all(np.less_equal(action, self.max_action)), (action, self.max_action) low = self.env.action_space.low high = self.env.action_space.high action = low + (high - low) * ( (action - self.min_action) / (self.max_action - self.min_action) ) action = np.clip(action, low, high) return action ================================================ FILE: gym/wrappers/resize_observation.py ================================================ """Wrapper for resizing observations.""" from typing import Union import numpy as np import gym from gym.error import DependencyNotInstalled from gym.spaces import Box class ResizeObservation(gym.ObservationWrapper): """Resize the image observation. This wrapper works on environments with image observations (or more generally observations of shape AxBxC) and resizes the observation to the shape given by the 2-tuple :attr:`shape`. The argument :attr:`shape` may also be an integer. In that case, the observation is scaled to a square of side-length :attr:`shape`. Example: >>> import gym >>> env = gym.make('CarRacing-v1') >>> env.observation_space.shape (96, 96, 3) >>> env = ResizeObservation(env, 64) >>> env.observation_space.shape (64, 64, 3) """ def __init__(self, env: gym.Env, shape: Union[tuple, int]): """Resizes image observations to shape given by :attr:`shape`. Args: env: The environment to apply the wrapper shape: The shape of the resized observations """ super().__init__(env) if isinstance(shape, int): shape = (shape, shape) assert all(x > 0 for x in shape), shape self.shape = tuple(shape) assert isinstance( env.observation_space, Box ), f"Expected the observation space to be Box, actual type: {type(env.observation_space)}" obs_shape = self.shape + env.observation_space.shape[2:] self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8) def observation(self, observation): """Updates the observations by resizing the observation to shape given by :attr:`shape`. Args: observation: The observation to reshape Returns: The reshaped observations Raises: DependencyNotInstalled: opencv-python is not installed """ try: import cv2 except ImportError: raise DependencyNotInstalled( "opencv is not install, run `pip install gym[other]`" ) observation = cv2.resize( observation, self.shape[::-1], interpolation=cv2.INTER_AREA ) if observation.ndim == 2: observation = np.expand_dims(observation, -1) return observation ================================================ FILE: gym/wrappers/step_api_compatibility.py ================================================ """Implementation of StepAPICompatibility wrapper class for transforming envs between new and old step API.""" import gym from gym.logger import deprecation from gym.utils.step_api_compatibility import ( convert_to_done_step_api, convert_to_terminated_truncated_step_api, ) class StepAPICompatibility(gym.Wrapper): r"""A wrapper which can transform an environment from new step API to old and vice-versa. Old step API refers to step() method returning (observation, reward, done, info) New step API refers to step() method returning (observation, reward, terminated, truncated, info) (Refer to docs for details on the API change) Args: env (gym.Env): the env to wrap. Can be in old or new API apply_step_compatibility (bool): Apply to convert environment to use new step API that returns two bools. (False by default) Examples: >>> env = gym.make("CartPole-v1") >>> env # wrapper not applied by default, set to new API >>>> >>> env = gym.make("CartPole-v1", apply_api_compatibility=True) # set to old API >>>>> >>> env = StepAPICompatibility(CustomEnv(), apply_step_compatibility=False) # manually using wrapper on unregistered envs """ def __init__(self, env: gym.Env, output_truncation_bool: bool = True): """A wrapper which can transform an environment from new step API to old and vice-versa. Args: env (gym.Env): the env to wrap. Can be in old or new API output_truncation_bool (bool): Whether the wrapper's step method outputs two booleans (new API) or one boolean (old API) """ super().__init__(env) self.output_truncation_bool = output_truncation_bool if not self.output_truncation_bool: deprecation( "Initializing environment in old step API which returns one bool instead of two." ) def step(self, action): """Steps through the environment, returning 5 or 4 items depending on `apply_step_compatibility`. Args: action: action to step through the environment with Returns: (observation, reward, terminated, truncated, info) or (observation, reward, done, info) """ step_returns = self.env.step(action) if self.output_truncation_bool: return convert_to_terminated_truncated_step_api(step_returns) else: return convert_to_done_step_api(step_returns) ================================================ FILE: gym/wrappers/time_aware_observation.py ================================================ """Wrapper for adding time aware observations to environment observation.""" import numpy as np import gym from gym.spaces import Box class TimeAwareObservation(gym.ObservationWrapper): """Augment the observation with the current time step in the episode. The observation space of the wrapped environment is assumed to be a flat :class:`Box`. In particular, pixel observations are not supported. This wrapper will append the current timestep within the current episode to the observation. Example: >>> import gym >>> env = gym.make('CartPole-v1') >>> env = TimeAwareObservation(env) >>> env.reset() array([ 0.03810719, 0.03522411, 0.02231044, -0.01088205, 0. ]) >>> env.step(env.action_space.sample())[0] array([ 0.03881167, -0.16021058, 0.0220928 , 0.28875574, 1. ]) """ def __init__(self, env: gym.Env): """Initialize :class:`TimeAwareObservation` that requires an environment with a flat :class:`Box` observation space. Args: env: The environment to apply the wrapper """ super().__init__(env) assert isinstance(env.observation_space, Box) assert env.observation_space.dtype == np.float32 low = np.append(self.observation_space.low, 0.0) high = np.append(self.observation_space.high, np.inf) self.observation_space = Box(low, high, dtype=np.float32) self.is_vector_env = getattr(env, "is_vector_env", False) def observation(self, observation): """Adds to the observation with the current time step. Args: observation: The observation to add the time step to Returns: The observation with the time step appended to """ return np.append(observation, self.t) def step(self, action): """Steps through the environment, incrementing the time step. Args: action: The action to take Returns: The environment's step using the action. """ self.t += 1 return super().step(action) def reset(self, **kwargs): """Reset the environment setting the time to zero. Args: **kwargs: Kwargs to apply to env.reset() Returns: The reset environment """ self.t = 0 return super().reset(**kwargs) ================================================ FILE: gym/wrappers/time_limit.py ================================================ """Wrapper for limiting the time steps of an environment.""" from typing import Optional import gym class TimeLimit(gym.Wrapper): """This wrapper will issue a `truncated` signal if a maximum number of timesteps is exceeded. If a truncation is not defined inside the environment itself, this is the only place that the truncation signal is issued. Critically, this is different from the `terminated` signal that originates from the underlying environment as part of the MDP. Example: >>> from gym.envs.classic_control import CartPoleEnv >>> from gym.wrappers import TimeLimit >>> env = CartPoleEnv() >>> env = TimeLimit(env, max_episode_steps=1000) """ def __init__( self, env: gym.Env, max_episode_steps: Optional[int] = None, ): """Initializes the :class:`TimeLimit` wrapper with an environment and the number of steps after which truncation will occur. Args: env: The environment to apply the wrapper max_episode_steps: An optional max episode steps (if ``Ǹone``, ``env.spec.max_episode_steps`` is used) """ super().__init__(env) if max_episode_steps is None and self.env.spec is not None: max_episode_steps = env.spec.max_episode_steps if self.env.spec is not None: self.env.spec.max_episode_steps = max_episode_steps self._max_episode_steps = max_episode_steps self._elapsed_steps = None def step(self, action): """Steps through the environment and if the number of steps elapsed exceeds ``max_episode_steps`` then truncate. Args: action: The environment step action Returns: The environment step ``(observation, reward, terminated, truncated, info)`` with `truncated=True` if the number of steps elapsed >= max episode steps """ observation, reward, terminated, truncated, info = self.env.step(action) self._elapsed_steps += 1 if self._elapsed_steps >= self._max_episode_steps: truncated = True return observation, reward, terminated, truncated, info def reset(self, **kwargs): """Resets the environment with :param:`**kwargs` and sets the number of steps elapsed to zero. Args: **kwargs: The kwargs to reset the environment with Returns: The reset environment """ self._elapsed_steps = 0 return self.env.reset(**kwargs) ================================================ FILE: gym/wrappers/transform_observation.py ================================================ """Wrapper for transforming observations.""" from typing import Any, Callable import gym class TransformObservation(gym.ObservationWrapper): """Transform the observation via an arbitrary function :attr:`f`. The function :attr:`f` should be defined on the observation space of the base environment, ``env``, and should, ideally, return values in the same space. If the transformation you wish to apply to observations returns values in a *different* space, you should subclass :class:`ObservationWrapper`, implement the transformation, and set the new observation space accordingly. If you were to use this wrapper instead, the observation space would be set incorrectly. Example: >>> import gym >>> import numpy as np >>> env = gym.make('CartPole-v1') >>> env = TransformObservation(env, lambda obs: obs + 0.1*np.random.randn(*obs.shape)) >>> env.reset() array([-0.08319338, 0.04635121, -0.07394746, 0.20877492]) """ def __init__(self, env: gym.Env, f: Callable[[Any], Any]): """Initialize the :class:`TransformObservation` wrapper with an environment and a transform function :param:`f`. Args: env: The environment to apply the wrapper f: A function that transforms the observation """ super().__init__(env) assert callable(f) self.f = f def observation(self, observation): """Transforms the observations with callable :attr:`f`. Args: observation: The observation to transform Returns: The transformed observation """ return self.f(observation) ================================================ FILE: gym/wrappers/transform_reward.py ================================================ """Wrapper for transforming the reward.""" from typing import Callable import gym from gym import RewardWrapper class TransformReward(RewardWrapper): """Transform the reward via an arbitrary function. Warning: If the base environment specifies a reward range which is not invariant under :attr:`f`, the :attr:`reward_range` of the wrapped environment will be incorrect. Example: >>> import gym >>> env = gym.make('CartPole-v1') >>> env = TransformReward(env, lambda r: 0.01*r) >>> env.reset() >>> observation, reward, terminated, truncated, info = env.step(env.action_space.sample()) >>> reward 0.01 """ def __init__(self, env: gym.Env, f: Callable[[float], float]): """Initialize the :class:`TransformReward` wrapper with an environment and reward transform function :param:`f`. Args: env: The environment to apply the wrapper f: A function that transforms the reward """ super().__init__(env) assert callable(f) self.f = f def reward(self, reward): """Transforms the reward using callable :attr:`f`. Args: reward: The reward to transform Returns: The transformed reward """ return self.f(reward) ================================================ FILE: gym/wrappers/vector_list_info.py ================================================ """Wrapper that converts the info format for vec envs into the list format.""" from typing import List import gym class VectorListInfo(gym.Wrapper): """Converts infos of vectorized environments from dict to List[dict]. This wrapper converts the info format of a vector environment from a dictionary to a list of dictionaries. This wrapper is intended to be used around vectorized environments. If using other wrappers that perform operation on info like `RecordEpisodeStatistics` this need to be the outermost wrapper. i.e. VectorListInfo(RecordEpisodeStatistics(envs)) Example:: >>> # actual >>> { ... "k": np.array[0., 0., 0.5, 0.3], ... "_k": np.array[False, False, True, True] ... } >>> # classic >>> [{}, {}, {k: 0.5}, {k: 0.3}] """ def __init__(self, env): """This wrapper will convert the info into the list format. Args: env (Env): The environment to apply the wrapper """ assert getattr( env, "is_vector_env", False ), "This wrapper can only be used in vectorized environments." super().__init__(env) def step(self, action): """Steps through the environment, convert dict info to list.""" observation, reward, terminated, truncated, infos = self.env.step(action) list_info = self._convert_info_to_list(infos) return observation, reward, terminated, truncated, list_info def reset(self, **kwargs): """Resets the environment using kwargs.""" obs, infos = self.env.reset(**kwargs) list_info = self._convert_info_to_list(infos) return obs, list_info def _convert_info_to_list(self, infos: dict) -> List[dict]: """Convert the dict info to list. Convert the dict info of the vectorized environment into a list of dictionaries where the i-th dictionary has the info of the i-th environment. Args: infos (dict): info dict coming from the env. Returns: list_info (list): converted info. """ list_info = [{} for _ in range(self.num_envs)] list_info = self._process_episode_statistics(infos, list_info) for k in infos: if k.startswith("_"): continue for i, has_info in enumerate(infos[f"_{k}"]): if has_info: list_info[i][k] = infos[k][i] return list_info def _process_episode_statistics(self, infos: dict, list_info: list) -> List[dict]: """Process episode statistics. `RecordEpisodeStatistics` wrapper add extra information to the info. This information are in the form of a dict of dict. This method process these information and add them to the info. `RecordEpisodeStatistics` info contains the keys "r", "l", "t" which represents "cumulative reward", "episode length", "elapsed time since instantiation of wrapper". Args: infos (dict): infos coming from `RecordEpisodeStatistics`. list_info (list): info of the current vectorized environment. Returns: list_info (list): updated info. """ episode_statistics = infos.pop("episode", False) if not episode_statistics: return list_info episode_statistics_mask = infos.pop("_episode") for i, has_info in enumerate(episode_statistics_mask): if has_info: list_info[i]["episode"] = {} list_info[i]["episode"]["r"] = episode_statistics["r"][i] list_info[i]["episode"]["l"] = episode_statistics["l"][i] list_info[i]["episode"]["t"] = episode_statistics["t"][i] return list_info ================================================ FILE: py.Dockerfile ================================================ # A Dockerfile that sets up a full Gym install with test dependencies ARG PYTHON_VERSION FROM python:$PYTHON_VERSION SHELL ["/bin/bash", "-o", "pipefail", "-c"] RUN apt-get -y update \ && apt-get install --no-install-recommends -y \ unzip \ libglu1-mesa-dev \ libgl1-mesa-dev \ libosmesa6-dev \ xvfb \ patchelf \ ffmpeg cmake \ && apt-get autoremove -y \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* \ # Download mujoco && mkdir /root/.mujoco \ && cd /root/.mujoco \ && wget -qO- 'https://github.com/deepmind/mujoco/releases/download/2.1.0/mujoco210-linux-x86_64.tar.gz' | tar -xzvf - ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/root/.mujoco/mujoco210/bin" COPY . /usr/local/gym/ WORKDIR /usr/local/gym/ RUN if [ "python:${PYTHON_VERSION}" = "python:3.6.15" ] ; then pip install .[box2d,classic_control,toy_text,other] pytest=="7.0.1" --no-cache-dir; else pip install .[testing] --no-cache-dir; fi ENTRYPOINT ["/usr/local/gym/bin/docker_entrypoint"] ================================================ FILE: pyproject.toml ================================================ [tool.pyright] include = [ "gym/**", "tests/**" ] exclude = [ "**/node_modules", "**/__pycache__", ] strict = [ ] typeCheckingMode = "basic" pythonVersion = "3.6" pythonPlatform = "All" typeshedPath = "typeshed" enableTypeIgnoreComments = true # This is required as the CI pre-commit does not download the module (i.e. numpy, pygame, box2d) # Therefore, we have to ignore missing imports reportMissingImports = "none" # Some modules are missing type stubs, which is an issue when running pyright locally reportMissingTypeStubs = false # For warning and error, will raise an error when reportInvalidTypeVarUse = "none" # reportUnknownMemberType = "warning" # -> raises 6035 warnings # reportUnknownParameterType = "warning" # -> raises 1327 warnings # reportUnknownVariableType = "warning" # -> raises 2585 warnings # reportUnknownArgumentType = "warning" # -> raises 2104 warnings reportGeneralTypeIssues = "none" # -> commented out raises 489 errors reportUntypedFunctionDecorator = "none" # -> pytest.mark.parameterize issues reportPrivateUsage = "warning" reportUnboundVariable = "warning" [tool.pytest.ini_options] filterwarnings = ['ignore:.*step API.*:DeprecationWarning'] # TODO: to be removed when old step API is removed ================================================ FILE: requirements.txt ================================================ numpy>=1.18.0 cloudpickle>=1.2.0 importlib_metadata>=4.8.0; python_version < '3.10' gym_notices>=0.0.4 dataclasses==0.8; python_version == '3.6' typing_extensions==4.3.0; python_version == '3.7' opencv-python>=3.0 lz4>=3.1.0 matplotlib>=3.0 box2d-py==2.3.5 pygame==2.1.0 ale-py~=0.8.0 mujoco==2.2.0 mujoco_py<2.2,>=2.1 imageio>=2.14.1 ================================================ FILE: setup.py ================================================ """Setups the project.""" import itertools import re from setuptools import find_packages, setup with open("gym/version.py") as file: full_version = file.read() assert ( re.match(r'VERSION = "\d\.\d+\.\d+"\n', full_version).group(0) == full_version ), f"Unexpected version: {full_version}" VERSION = re.search(r"\d\.\d+\.\d+", full_version).group(0) # Environment-specific dependencies. extras = { "atari": ["ale-py~=0.8.0"], "accept-rom-license": ["autorom[accept-rom-license]~=0.4.2"], "box2d": ["box2d-py==2.3.5", "pygame==2.1.0", "swig==4.*"], "classic_control": ["pygame==2.1.0"], "mujoco_py": ["mujoco_py<2.2,>=2.1"], "mujoco": ["mujoco==2.2", "imageio>=2.14.1"], "toy_text": ["pygame==2.1.0"], "other": ["lz4>=3.1.0", "opencv-python>=3.0", "matplotlib>=3.0", "moviepy>=1.0.0"], } # Testing dependency groups. testing_group = set(extras.keys()) - {"accept-rom-license", "atari"} extras["testing"] = list( set(itertools.chain.from_iterable(map(lambda group: extras[group], testing_group))) ) + ["pytest==7.0.1"] # All dependency groups - accept rom license as requires user to run all_groups = set(extras.keys()) - {"accept-rom-license"} extras["all"] = list( set(itertools.chain.from_iterable(map(lambda group: extras[group], all_groups))) ) # Uses the readme as the description on PyPI with open("README.md") as fh: long_description = "" header_count = 0 for line in fh: if line.startswith("##"): header_count += 1 if header_count < 2: long_description += line else: break setup( author="Gym Community", author_email="jkterry@umd.edu", classifiers=[ # Python 3.6 is minimally supported (only with basic gym environments and API) "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", ], description="Gym: A universal API for reinforcement learning environments", extras_require=extras, install_requires=[ "numpy >= 1.18.0", "cloudpickle >= 1.2.0", "importlib_metadata >= 4.8.0; python_version < '3.10'", "gym_notices >= 0.0.4", "dataclasses == 0.8; python_version == '3.6'", ], license="MIT", long_description=long_description, long_description_content_type="text/markdown", name="gym", packages=[package for package in find_packages() if package.startswith("gym")], package_data={ "gym": [ "envs/mujoco/assets/*.xml", "envs/classic_control/assets/*.png", "envs/toy_text/font/*.ttf", "envs/toy_text/img/*.png", "py.typed", ] }, python_requires=">=3.6", tests_require=extras["testing"], url="https://www.gymlibrary.dev/", version=VERSION, zip_safe=False, ) ================================================ FILE: test_requirements.txt ================================================ box2d-py==2.3.5 lz4>=3.1.0 opencv-python>=3.0 mujoco==2.2.0 matplotlib>=3.0 imageio>=2.14.1 pygame==2.1.0 mujoco_py<2.2,>=2.1 pytest==7.0.1 ================================================ FILE: tests/__init__.py ================================================ ================================================ FILE: tests/envs/__init__.py ================================================ ================================================ FILE: tests/envs/test_action_dim_check.py ================================================ import numpy as np import pytest import gym from gym import spaces from gym.envs.registration import EnvSpec from tests.envs.utils import all_testing_initialised_envs, mujoco_testing_env_specs @pytest.mark.parametrize( "env_spec", mujoco_testing_env_specs, ids=[env_spec.id for env_spec in mujoco_testing_env_specs], ) def test_mujoco_action_dimensions(env_spec: EnvSpec): """Test that for all mujoco environment, mis-dimensioned actions, an error is raised. Types of mis-dimensioned actions: * Too few actions * Too many actions * Too few dimensions * Too many dimensions * Incorrect shape """ env = env_spec.make(disable_env_checker=True) env.reset() # Too few actions with pytest.raises(ValueError, match="Action dimension mismatch"): env.step(env.action_space.sample()[1:]) # Too many actions with pytest.raises(ValueError, match="Action dimension mismatch"): env.step(np.append(env.action_space.sample(), 0)) # Too few dimensions with pytest.raises(ValueError, match="Action dimension mismatch"): env.step(0.1) # Too many dimensions with pytest.raises(ValueError, match="Action dimension mismatch"): env.step(np.expand_dims(env.action_space.sample(), 0)) # Incorrect shape with pytest.raises(ValueError, match="Action dimension mismatch"): env.step(np.expand_dims(env.action_space.sample(), 1)) env.close() DISCRETE_ENVS = list( filter( lambda env: isinstance(env.action_space, spaces.Discrete), all_testing_initialised_envs, ) ) @pytest.mark.parametrize( "env", DISCRETE_ENVS, ids=[env.spec.id for env in DISCRETE_ENVS] ) def test_discrete_actions_out_of_bound(env: gym.Env): """Test out of bound actions in Discrete action_space. In discrete action_space environments, `out-of-bound` actions are not allowed and should raise an exception. Args: env (gym.Env): the gym environment """ assert isinstance(env.action_space, spaces.Discrete) upper_bound = env.action_space.start + env.action_space.n - 1 env.reset() with pytest.raises(Exception): env.step(upper_bound + 1) env.close() BOX_ENVS = list( filter( lambda env: isinstance(env.action_space, spaces.Box), all_testing_initialised_envs, ) ) OOB_VALUE = 100 @pytest.mark.parametrize("env", BOX_ENVS, ids=[env.spec.id for env in BOX_ENVS]) def test_box_actions_out_of_bound(env: gym.Env): """Test out of bound actions in Box action_space. Environments with Box actions spaces perform clipping inside `step`. The expected behaviour is that an action `out-of-bound` has the same effect of an action with value exactly at the upper (or lower) bound. Args: env (gym.Env): the gym environment """ env.reset(seed=42) oob_env = gym.make(env.spec.id, disable_env_checker=True) oob_env.reset(seed=42) assert isinstance(env.action_space, spaces.Box) dtype = env.action_space.dtype upper_bounds = env.action_space.high lower_bounds = env.action_space.low for i, (is_upper_bound, is_lower_bound) in enumerate( zip(env.action_space.bounded_above, env.action_space.bounded_below) ): if is_upper_bound: obs, _, _, _, _ = env.step(upper_bounds) oob_action = upper_bounds.copy() oob_action[i] += np.cast[dtype](OOB_VALUE) assert oob_action[i] > upper_bounds[i] oob_obs, _, _, _, _ = oob_env.step(oob_action) assert np.alltrue(obs == oob_obs) if is_lower_bound: obs, _, _, _, _ = env.step( lower_bounds ) # `env` is unwrapped, and in new step API oob_action = lower_bounds.copy() oob_action[i] -= np.cast[dtype](OOB_VALUE) assert oob_action[i] < lower_bounds[i] oob_obs, _, _, _, _ = oob_env.step(oob_action) assert np.alltrue(obs == oob_obs) env.close() ================================================ FILE: tests/envs/test_compatibility.py ================================================ import sys from typing import Any, Dict, Optional, Tuple import numpy as np import gym from gym.spaces import Discrete from gym.wrappers.compatibility import EnvCompatibility, LegacyEnv class LegacyEnvExplicit(LegacyEnv, gym.Env): """Legacy env that explicitly implements the old API.""" observation_space = Discrete(1) action_space = Discrete(1) metadata = {"render.modes": ["human", "rgb_array"]} def __init__(self): pass def reset(self): return 0 def step(self, action): return 0, 0, False, {} def render(self, mode="human"): if mode == "human": return elif mode == "rgb_array": return np.zeros((1, 1, 3), dtype=np.uint8) def close(self): pass def seed(self, seed=None): pass class LegacyEnvImplicit(gym.Env): """Legacy env that implicitly implements the old API as a protocol.""" observation_space = Discrete(1) action_space = Discrete(1) metadata = {"render.modes": ["human", "rgb_array"]} def __init__(self): pass def reset(self): # type: ignore return 0 # type: ignore def step(self, action: Any) -> Tuple[int, float, bool, Dict]: return 0, 0.0, False, {} def render(self, mode: Optional[str] = "human") -> Any: if mode == "human": return elif mode == "rgb_array": return np.zeros((1, 1, 3), dtype=np.uint8) def close(self): pass def seed(self, seed: Optional[int] = None): pass def test_explicit(): old_env = LegacyEnvExplicit() assert isinstance(old_env, LegacyEnv) env = EnvCompatibility(old_env, render_mode="rgb_array") assert env.observation_space == Discrete(1) assert env.action_space == Discrete(1) assert env.reset() == (0, {}) assert env.reset(seed=0, options={"some": "option"}) == (0, {}) assert env.step(0) == (0, 0, False, False, {}) assert env.render().shape == (1, 1, 3) env.close() def test_implicit(): old_env = LegacyEnvImplicit() if sys.version_info >= (3, 7): # We need to give up on typing in Python 3.6 assert isinstance(old_env, LegacyEnv) env = EnvCompatibility(old_env, render_mode="rgb_array") assert env.observation_space == Discrete(1) assert env.action_space == Discrete(1) assert env.reset() == (0, {}) assert env.reset(seed=0, options={"some": "option"}) == (0, {}) assert env.step(0) == (0, 0, False, False, {}) assert env.render().shape == (1, 1, 3) env.close() def test_make_compatibility_in_spec(): gym.register( id="LegacyTestEnv-v0", entry_point=LegacyEnvExplicit, apply_api_compatibility=True, ) env = gym.make("LegacyTestEnv-v0", render_mode="rgb_array") assert env.observation_space == Discrete(1) assert env.action_space == Discrete(1) assert env.reset() == (0, {}) assert env.reset(seed=0, options={"some": "option"}) == (0, {}) assert env.step(0) == (0, 0, False, False, {}) img = env.render() assert isinstance(img, np.ndarray) assert img.shape == (1, 1, 3) # type: ignore env.close() del gym.envs.registration.registry["LegacyTestEnv-v0"] def test_make_compatibility_in_make(): gym.register(id="LegacyTestEnv-v0", entry_point=LegacyEnvExplicit) env = gym.make( "LegacyTestEnv-v0", apply_api_compatibility=True, render_mode="rgb_array" ) assert env.observation_space == Discrete(1) assert env.action_space == Discrete(1) assert env.reset() == (0, {}) assert env.reset(seed=0, options={"some": "option"}) == (0, {}) assert env.step(0) == (0, 0, False, False, {}) img = env.render() assert isinstance(img, np.ndarray) assert img.shape == (1, 1, 3) # type: ignore env.close() del gym.envs.registration.registry["LegacyTestEnv-v0"] ================================================ FILE: tests/envs/test_env_implementation.py ================================================ from typing import Optional import numpy as np import pytest import gym from gym.envs.box2d import BipedalWalker from gym.envs.box2d.lunar_lander import demo_heuristic_lander from gym.envs.toy_text import TaxiEnv from gym.envs.toy_text.frozen_lake import generate_random_map def test_lunar_lander_heuristics(): """Tests the LunarLander environment by checking if the heuristic lander works.""" lunar_lander = gym.make("LunarLander-v2", disable_env_checker=True) total_reward = demo_heuristic_lander(lunar_lander, seed=1) assert total_reward > 100 def test_carracing_domain_randomize(): """Tests the CarRacing Environment domain randomization. CarRacing DomainRandomize should have different colours at every reset. However, it should have same colours when `options={"randomize": False}` is given to reset. """ env = gym.make("CarRacing-v2", domain_randomize=True) road_color = env.road_color bg_color = env.bg_color grass_color = env.grass_color env.reset(options={"randomize": False}) assert ( road_color == env.road_color ).all(), f"Have different road color after reset with randomize turned off. Before: {road_color}, after: {env.road_color}." assert ( bg_color == env.bg_color ).all(), f"Have different bg color after reset with randomize turned off. Before: {bg_color}, after: {env.bg_color}." assert ( grass_color == env.grass_color ).all(), f"Have different grass color after reset with randomize turned off. Before: {grass_color}, after: {env.grass_color}." env.reset() assert ( road_color != env.road_color ).all(), f"Have same road color after reset. Before: {road_color}, after: {env.road_color}." assert ( bg_color != env.bg_color ).all(), ( f"Have same bg color after reset. Before: {bg_color}, after: {env.bg_color}." ) assert ( grass_color != env.grass_color ).all(), f"Have same grass color after reset. Before: {grass_color}, after: {env.grass_color}." @pytest.mark.parametrize("seed", range(5)) def test_bipedal_walker_hardcore_creation(seed: int): """Test BipedalWalker hardcore creation. BipedalWalker with `hardcore=True` should have ladders stumps and pitfalls. A convenient way to identify if ladders, stumps and pitfall are created is checking whether the terrain has that particular terrain color. Args: seed (int): environment seed """ HC_TERRAINS_COLOR1 = (255, 255, 255) HC_TERRAINS_COLOR2 = (153, 153, 153) env = gym.make("BipedalWalker-v3", disable_env_checker=True).unwrapped hc_env = gym.make("BipedalWalkerHardcore-v3", disable_env_checker=True).unwrapped assert isinstance(env, BipedalWalker) and isinstance(hc_env, BipedalWalker) assert env.hardcore is False and hc_env.hardcore is True env.reset(seed=seed) hc_env.reset(seed=seed) for terrain in env.terrain: assert terrain.color1 != HC_TERRAINS_COLOR1 assert terrain.color2 != HC_TERRAINS_COLOR2 hc_terrains_color1_count = 0 hc_terrains_color2_count = 0 for terrain in hc_env.terrain: if terrain.color1 == HC_TERRAINS_COLOR1: hc_terrains_color1_count += 1 if terrain.color2 == HC_TERRAINS_COLOR2: hc_terrains_color2_count += 1 assert hc_terrains_color1_count > 0 assert hc_terrains_color2_count > 0 @pytest.mark.parametrize("map_size", [5, 10, 16]) def test_frozenlake_dfs_map_generation(map_size: int): """Frozenlake has the ability to generate random maps. This function checks that the random maps will always be possible to solve for sizes 5, 10, 16, currently only 8x8 maps can be generated. """ new_frozenlake = generate_random_map(map_size) assert len(new_frozenlake) == map_size assert len(new_frozenlake[0]) == map_size # Runs a depth first search through the map to find the path. directions = [(1, 0), (0, 1), (-1, 0), (0, -1)] frontier, discovered = [], set() frontier.append((0, 0)) while frontier: row, col = frontier.pop() if (row, col) not in discovered: discovered.add((row, col)) for row_direction, col_direction in directions: new_row = row + row_direction new_col = col + col_direction if 0 <= new_row < map_size and 0 <= new_col < map_size: if new_frozenlake[new_row][new_col] == "G": return # Successful, a route through the map was found if new_frozenlake[new_row][new_col] not in "#H": frontier.append((new_row, new_col)) raise AssertionError("No path through the frozenlake was found.") def test_taxi_action_mask(): env = TaxiEnv() for state in env.P: mask = env.action_mask(state) for action, possible in enumerate(mask): _, next_state, _, _ = env.P[state][action][0] assert state != next_state if possible else state == next_state def test_taxi_encode_decode(): env = TaxiEnv() state, info = env.reset() for _ in range(100): assert ( env.encode(*env.decode(state)) == state ), f"state={state}, encode(decode(state))={env.encode(*env.decode(state))}" state, _, _, _, _ = env.step(env.action_space.sample()) @pytest.mark.parametrize( "env_name", ["Acrobot-v1", "CartPole-v1", "MountainCar-v0", "MountainCarContinuous-v0"], ) @pytest.mark.parametrize( "low_high", [None, (-0.4, 0.4), (np.array(-0.4), np.array(0.4))] ) def test_customizable_resets(env_name: str, low_high: Optional[list]): env = gym.make(env_name) env.action_space.seed(0) # First ensure we can do a reset. if low_high is None: env.reset() else: low, high = low_high env.reset(options={"low": low, "high": high}) assert np.all((env.state >= low) & (env.state <= high)) # Make sure we can take a step. env.step(env.action_space.sample()) # We test Pendulum separately, as the parameters are handled differently. @pytest.mark.parametrize( "low_high", [ None, (1.2, 1.0), (np.array(1.2), np.array(1.0)), ], ) def test_customizable_pendulum_resets(low_high: Optional[list]): env = gym.make("Pendulum-v1") env.action_space.seed(0) # First ensure we can do a reset and the values are within expected ranges. if low_high is None: env.reset() else: low, high = low_high # Pendulum is initialized a little differently than the other # environments, where we specify the x and y values for the upper # limit (and lower limit is just the negative of it). env.reset(options={"x_init": low, "y_init": high}) # Make sure we can take a step. env.step(env.action_space.sample()) @pytest.mark.parametrize( "env_name", ["Acrobot-v1", "CartPole-v1", "MountainCar-v0", "MountainCarContinuous-v0"], ) @pytest.mark.parametrize( "low_high", [ ("x", "y"), (10.0, 8.0), ([-1.0, -1.0], [1.0, 1.0]), (np.array([-1.0, -1.0]), np.array([1.0, 1.0])), ], ) def test_invalid_customizable_resets(env_name: str, low_high: list): env = gym.make(env_name) low, high = low_high with pytest.raises(ValueError): # match=re.escape(f"Lower bound ({low}) must be lower than higher bound ({high}).") # match=f"An option ({x}) could not be converted to a float." env.reset(options={"low": low, "high": high}) ================================================ FILE: tests/envs/test_envs.py ================================================ import pickle import warnings import numpy as np import pytest import gym from gym.envs.registration import EnvSpec from gym.logger import warn from gym.utils.env_checker import check_env, data_equivalence from tests.envs.utils import ( all_testing_env_specs, all_testing_initialised_envs, assert_equals, ) # This runs a smoketest on each official registered env. We may want # to try also running environments which are not officially registered envs. PASSIVE_CHECK_IGNORE_WARNING = [ f"\x1b[33mWARN: {message}\x1b[0m" for message in [ "This version of the mujoco environments depends on the mujoco-py bindings, which are no longer maintained and may stop working. Please upgrade to the v4 versions of the environments (which depend on the mujoco python bindings instead), unless you are trying to precisely replicate previous works).", "Initializing environment in done (old) step API which returns one bool instead of two.", ] ] CHECK_ENV_IGNORE_WARNINGS = [ f"\x1b[33mWARN: {message}\x1b[0m" for message in [ "This version of the mujoco environments depends on the mujoco-py bindings, which are no longer maintained and may stop working. Please upgrade to the v4 versions of the environments (which depend on the mujoco python bindings instead), unless you are trying to precisely replicate previous works).", "A Box observation space minimum value is -infinity. This is probably too low.", "A Box observation space maximum value is -infinity. This is probably too high.", "For Box action spaces, we recommend using a symmetric and normalized space (range=[-1, 1] or [0, 1]). See https://stable-baselines3.readthedocs.io/en/master/guide/rl_tips.html for more information.", ] ] @pytest.mark.parametrize( "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs] ) def test_envs_pass_env_checker(spec): """Check that all environments pass the environment checker with no warnings other than the expected.""" with warnings.catch_warnings(record=True) as caught_warnings: env = spec.make(disable_env_checker=True).unwrapped check_env(env) env.close() for warning in caught_warnings: if warning.message.args[0] not in CHECK_ENV_IGNORE_WARNINGS: raise gym.error.Error(f"Unexpected warning: {warning.message}") # Note that this precludes running this test in multiple threads. # However, we probably already can't do multithreading due to some environments. SEED = 0 NUM_STEPS = 50 @pytest.mark.parametrize( "env_spec", all_testing_env_specs, ids=[env.id for env in all_testing_env_specs] ) def test_env_determinism_rollout(env_spec: EnvSpec): """Run a rollout with two environments and assert equality. This test run a rollout of NUM_STEPS steps with two environments initialized with the same seed and assert that: - observation after first reset are the same - same actions are sampled by the two envs - observations are contained in the observation space - obs, rew, done and info are equals between the two envs """ # Don't check rollout equality if it's a nondeterministic environment. if env_spec.nondeterministic is True: return env_1 = env_spec.make(disable_env_checker=True) env_2 = env_spec.make(disable_env_checker=True) initial_obs_1, initial_info_1 = env_1.reset(seed=SEED) initial_obs_2, initial_info_2 = env_2.reset(seed=SEED) assert_equals(initial_obs_1, initial_obs_2) env_1.action_space.seed(SEED) for time_step in range(NUM_STEPS): # We don't evaluate the determinism of actions action = env_1.action_space.sample() obs_1, rew_1, terminated_1, truncated_1, info_1 = env_1.step(action) obs_2, rew_2, terminated_2, truncated_2, info_2 = env_2.step(action) assert_equals(obs_1, obs_2, f"[{time_step}] ") assert env_1.observation_space.contains( obs_1 ) # obs_2 verified by previous assertion assert rew_1 == rew_2, f"[{time_step}] reward 1={rew_1}, reward 2={rew_2}" assert ( terminated_1 == terminated_2 ), f"[{time_step}] done 1={terminated_1}, done 2={terminated_2}" assert ( truncated_1 == truncated_2 ), f"[{time_step}] done 1={truncated_1}, done 2={truncated_2}" assert_equals(info_1, info_2, f"[{time_step}] ") if ( terminated_1 or truncated_1 ): # terminated_2, truncated_2 verified by previous assertion env_1.reset(seed=SEED) env_2.reset(seed=SEED) env_1.close() env_2.close() def check_rendered(rendered_frame, mode: str): """Check that the rendered frame is as expected.""" if mode == "rgb_array_list": assert isinstance(rendered_frame, list) for frame in rendered_frame: check_rendered(frame, "rgb_array") elif mode == "rgb_array": assert isinstance(rendered_frame, np.ndarray) assert len(rendered_frame.shape) == 3 assert rendered_frame.shape[2] == 3 assert np.all(rendered_frame >= 0) and np.all(rendered_frame <= 255) elif mode == "ansi": assert isinstance(rendered_frame, str) assert len(rendered_frame) > 0 elif mode == "state_pixels_list": assert isinstance(rendered_frame, list) for frame in rendered_frame: check_rendered(frame, "rgb_array") elif mode == "state_pixels": check_rendered(rendered_frame, "rgb_array") elif mode == "depth_array_list": assert isinstance(rendered_frame, list) for frame in rendered_frame: check_rendered(frame, "depth_array") elif mode == "depth_array": assert isinstance(rendered_frame, np.ndarray) assert len(rendered_frame.shape) == 2 else: warn( f"Unknown render mode: {mode}, cannot check that the rendered data is correct. Add case to `check_rendered`" ) non_mujoco_py_env_specs = [ spec for spec in all_testing_env_specs if "mujoco" not in spec.entry_point or "v4" in spec.id ] @pytest.mark.parametrize( "spec", non_mujoco_py_env_specs, ids=[spec.id for spec in non_mujoco_py_env_specs] ) def test_render_modes(spec): """There is a known issue where rendering a mujoco environment then mujoco-py will cause an error on non-mac based systems. Therefore, we are only testing with mujoco environments. """ env = spec.make() assert "rgb_array" in env.metadata["render_modes"] assert "human" in env.metadata["render_modes"] for mode in env.metadata["render_modes"]: if mode != "human": new_env = spec.make(render_mode=mode) new_env.reset() rendered = new_env.render() check_rendered(rendered, mode) new_env.step(new_env.action_space.sample()) rendered = new_env.render() check_rendered(rendered, mode) new_env.close() env.close() @pytest.mark.parametrize( "env", all_testing_initialised_envs, ids=[env.spec.id for env in all_testing_initialised_envs], ) def test_pickle_env(env: gym.Env): pickled_env = pickle.loads(pickle.dumps(env)) data_equivalence(env.reset(), pickled_env.reset()) action = env.action_space.sample() data_equivalence(env.step(action), pickled_env.step(action)) env.close() pickled_env.close() ================================================ FILE: tests/envs/test_make.py ================================================ """Tests that gym.make works as expected.""" import re import warnings from copy import deepcopy import numpy as np import pytest import gym from gym.envs.classic_control import cartpole from gym.wrappers import AutoResetWrapper, HumanRendering, OrderEnforcing, TimeLimit from gym.wrappers.env_checker import PassiveEnvChecker from tests.envs.test_envs import PASSIVE_CHECK_IGNORE_WARNING from tests.envs.utils import all_testing_env_specs from tests.envs.utils_envs import ArgumentEnv, RegisterDuringMakeEnv from tests.testing_env import GenericTestEnv, old_step_fn from tests.wrappers.utils import has_wrapper gym.register( "RegisterDuringMakeEnv-v0", entry_point="tests.envs.utils_envs:RegisterDuringMakeEnv", ) gym.register( id="test.ArgumentEnv-v0", entry_point="tests.envs.utils_envs:ArgumentEnv", kwargs={ "arg1": "arg1", "arg2": "arg2", }, ) gym.register( id="test/NoHuman-v0", entry_point="tests.envs.utils_envs:NoHuman", ) gym.register( id="test/NoHumanOldAPI-v0", entry_point="tests.envs.utils_envs:NoHumanOldAPI", ) gym.register( id="test/NoHumanNoRGB-v0", entry_point="tests.envs.utils_envs:NoHumanNoRGB", ) def test_make(): env = gym.make("CartPole-v1", disable_env_checker=True) assert env.spec.id == "CartPole-v1" assert isinstance(env.unwrapped, cartpole.CartPoleEnv) env.close() def test_make_deprecated(): with warnings.catch_warnings(record=True): with pytest.raises( gym.error.Error, match=re.escape( "Environment version v0 for `Humanoid` is deprecated. Please use `Humanoid-v4` instead." ), ): gym.make("Humanoid-v0", disable_env_checker=True) def test_make_max_episode_steps(): # Default, uses the spec's env = gym.make("CartPole-v1", disable_env_checker=True) assert has_wrapper(env, TimeLimit) assert ( env.spec.max_episode_steps == gym.envs.registry["CartPole-v1"].max_episode_steps ) env.close() # Custom max episode steps env = gym.make("CartPole-v1", max_episode_steps=100, disable_env_checker=True) assert has_wrapper(env, TimeLimit) assert env.spec.max_episode_steps == 100 env.close() # Env spec has no max episode steps assert gym.spec("test.ArgumentEnv-v0").max_episode_steps is None env = gym.make( "test.ArgumentEnv-v0", arg1=None, arg2=None, arg3=None, disable_env_checker=True ) assert has_wrapper(env, TimeLimit) is False env.close() def test_gym_make_autoreset(): """Tests that `gym.make` autoreset wrapper is applied only when `gym.make(..., autoreset=True)`.""" env = gym.make("CartPole-v1", disable_env_checker=True) assert has_wrapper(env, AutoResetWrapper) is False env.close() env = gym.make("CartPole-v1", autoreset=False, disable_env_checker=True) assert has_wrapper(env, AutoResetWrapper) is False env.close() env = gym.make("CartPole-v1", autoreset=True) assert has_wrapper(env, AutoResetWrapper) env.close() def test_make_disable_env_checker(): """Tests that `gym.make` disable env checker is applied only when `gym.make(..., disable_env_checker=False)`.""" spec = deepcopy(gym.spec("CartPole-v1")) # Test with spec disable env checker spec.disable_env_checker = False env = gym.make(spec) assert has_wrapper(env, PassiveEnvChecker) env.close() # Test with overwritten spec using make disable env checker assert spec.disable_env_checker is False env = gym.make(spec, disable_env_checker=True) assert has_wrapper(env, PassiveEnvChecker) is False env.close() # Test with spec enabled disable env checker spec.disable_env_checker = True env = gym.make(spec) assert has_wrapper(env, PassiveEnvChecker) is False env.close() # Test with overwritten spec using make disable env checker assert spec.disable_env_checker is True env = gym.make(spec, disable_env_checker=False) assert has_wrapper(env, PassiveEnvChecker) env.close() def test_apply_api_compatibility(): gym.register( "testing-old-env", lambda: GenericTestEnv(step_fn=old_step_fn), apply_api_compatibility=True, max_episode_steps=3, ) env = gym.make("testing-old-env") env.reset() assert len(env.step(env.action_space.sample())) == 5 env.step(env.action_space.sample()) _, _, termination, truncation, _ = env.step(env.action_space.sample()) assert termination is False and truncation is True gym.spec("testing-old-env").apply_api_compatibility = False env = gym.make("testing-old-env") # Cannot run reset and step as will not work env = gym.make("testing-old-env", apply_api_compatibility=True) env.reset() assert len(env.step(env.action_space.sample())) == 5 env.step(env.action_space.sample()) _, _, termination, truncation, _ = env.step(env.action_space.sample()) assert termination is False and truncation is True gym.envs.registry.pop("testing-old-env") @pytest.mark.parametrize( "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs] ) def test_passive_checker_wrapper_warnings(spec): with warnings.catch_warnings(record=True) as caught_warnings: env = gym.make(spec) # disable_env_checker=False env.reset() env.step(env.action_space.sample()) # todo, add check for render, bugged due to mujoco v2/3 and v4 envs env.close() for warning in caught_warnings: if warning.message.args[0] not in PASSIVE_CHECK_IGNORE_WARNING: raise gym.error.Error(f"Unexpected warning: {warning.message}") def test_make_order_enforcing(): """Checks that gym.make wrappers the environment with the OrderEnforcing wrapper.""" assert all(spec.order_enforce is True for spec in all_testing_env_specs) env = gym.make("CartPole-v1", disable_env_checker=True) assert has_wrapper(env, OrderEnforcing) # We can assume that there all other specs will also have the order enforcing env.close() gym.register( id="test.OrderlessArgumentEnv-v0", entry_point="tests.envs.utils_envs:ArgumentEnv", order_enforce=False, kwargs={"arg1": None, "arg2": None, "arg3": None}, ) env = gym.make("test.OrderlessArgumentEnv-v0", disable_env_checker=True) assert has_wrapper(env, OrderEnforcing) is False env.close() def test_make_render_mode(): env = gym.make("CartPole-v1", disable_env_checker=True) assert env.render_mode is None env.close() # Make sure that render_mode is applied correctly env = gym.make( "CartPole-v1", render_mode="rgb_array_list", disable_env_checker=True ) assert env.render_mode == "rgb_array_list" env.reset() renders = env.render() assert isinstance( renders, list ) # Make sure that the `render` method does what is supposed to assert isinstance(renders[0], np.ndarray) env.close() env = gym.make("CartPole-v1", render_mode=None, disable_env_checker=True) assert env.render_mode is None valid_render_modes = env.metadata["render_modes"] env.close() assert len(valid_render_modes) > 0 with warnings.catch_warnings(record=True) as caught_warnings: env = gym.make( "CartPole-v1", render_mode=valid_render_modes[0], disable_env_checker=True ) assert env.render_mode == valid_render_modes[0] env.close() for warning in caught_warnings: raise gym.error.Error(f"Unexpected warning: {warning.message}") # Make sure that native rendering is used when possible env = gym.make("CartPole-v1", render_mode="human", disable_env_checker=True) assert not has_wrapper(env, HumanRendering) # Should use native human-rendering assert env.render_mode == "human" env.close() with pytest.warns( UserWarning, match=re.escape( "You are trying to use 'human' rendering for an environment that doesn't natively support it. The HumanRendering wrapper is being applied to your environment." ), ): # Make sure that `HumanRendering` is applied here env = gym.make( "test/NoHuman-v0", render_mode="human", disable_env_checker=True ) # This environment doesn't use native rendering assert has_wrapper(env, HumanRendering) assert env.render_mode == "human" env.close() with pytest.raises( TypeError, match=re.escape("got an unexpected keyword argument 'render_mode'") ): gym.make( "test/NoHumanOldAPI-v0", render_mode="rgb_array_list", disable_env_checker=True, ) # Make sure that an additional error is thrown a user tries to use the wrapper on an environment with old API with warnings.catch_warnings(record=True): with pytest.raises( gym.error.Error, match=re.escape( "You passed render_mode='human' although test/NoHumanOldAPI-v0 doesn't implement human-rendering natively." ), ): gym.make( "test/NoHumanOldAPI-v0", render_mode="human", disable_env_checker=True ) # This test ensures that the additional exception "Gym tried to apply the HumanRendering wrapper but it looks like # your environment is using the old rendering API" is *not* triggered by a TypeError that originate from # a keyword that is not `render_mode` with pytest.raises( TypeError, match=re.escape("got an unexpected keyword argument 'render'"), ): gym.make("CarRacing-v2", render="human") def test_make_kwargs(): env = gym.make( "test.ArgumentEnv-v0", arg2="override_arg2", arg3="override_arg3", disable_env_checker=True, ) assert env.spec.id == "test.ArgumentEnv-v0" assert isinstance(env.unwrapped, ArgumentEnv) assert env.arg1 == "arg1" assert env.arg2 == "override_arg2" assert env.arg3 == "override_arg3" env.close() def test_import_module_during_make(): # Test custom environment which is registered at make env = gym.make( "tests.envs.utils:RegisterDuringMakeEnv-v0", disable_env_checker=True, ) assert isinstance(env.unwrapped, RegisterDuringMakeEnv) env.close() ================================================ FILE: tests/envs/test_mujoco.py ================================================ import numpy as np import pytest import gym from gym import envs from gym.envs.registration import EnvSpec from tests.envs.utils import mujoco_testing_env_specs EPS = 1e-6 def verify_environments_match( old_env_id: str, new_env_id: str, seed: int = 1, num_actions: int = 1000 ): """Verifies with two environment ids (old and new) are identical in obs, reward and done (except info where all old info must be contained in new info).""" old_env = envs.make(old_env_id, disable_env_checker=True) new_env = envs.make(new_env_id, disable_env_checker=True) old_reset_obs, old_info = old_env.reset(seed=seed) new_reset_obs, new_info = new_env.reset(seed=seed) np.testing.assert_allclose(old_reset_obs, new_reset_obs) for i in range(num_actions): action = old_env.action_space.sample() old_obs, old_reward, old_terminated, old_truncated, old_info = old_env.step( action ) new_obs, new_reward, new_terminated, new_truncated, new_info = new_env.step( action ) np.testing.assert_allclose(old_obs, new_obs, atol=EPS) np.testing.assert_allclose(old_reward, new_reward, atol=EPS) np.testing.assert_equal(old_terminated, new_terminated) np.testing.assert_equal(old_truncated, new_truncated) for key in old_info: np.testing.assert_allclose(old_info[key], new_info[key], atol=EPS) if old_terminated or old_truncated: break EXCLUDE_POS_FROM_OBS = [ "Ant", "HalfCheetah", "Hopper", "Humanoid", "Swimmer", "Walker2d", ] @pytest.mark.parametrize( "env_spec", mujoco_testing_env_specs, ids=[env_spec.id for env_spec in mujoco_testing_env_specs], ) def test_obs_space_mujoco_environments(env_spec: EnvSpec): """Check that the returned observations are contained in the observation space of the environment""" env = env_spec.make(disable_env_checker=True) reset_obs, info = env.reset() assert env.observation_space.contains( reset_obs ), f"Obseravtion returned by reset() of {env_spec.id} is not contained in the default observation space {env.observation_space}." action = env.action_space.sample() step_obs, _, _, _, _ = env.step(action) assert env.observation_space.contains( step_obs ), f"Obseravtion returned by step(action) of {env_spec.id} is not contained in the default observation space {env.observation_space}." if env_spec.name in EXCLUDE_POS_FROM_OBS and ( env_spec.version == 4 or env_spec.version == 3 ): env = env_spec.make( disable_env_checker=True, exclude_current_positions_from_observation=False ) reset_obs, info = env.reset() assert env.observation_space.contains( reset_obs ), f"Obseravtion of {env_spec.id} is not contained in the default observation space {env.observation_space} when excluding current position from observation." step_obs, _, _, _, _ = env.step(action) assert env.observation_space.contains( step_obs ), f"Obseravtion returned by step(action) of {env_spec.id} is not contained in the default observation space {env.observation_space} when excluding current position from observation." # Ant-v4 has the option of including contact forces in the observation space with the use_contact_forces argument if env_spec.name == "Ant" and env_spec.version == 4: env = env_spec.make(disable_env_checker=True, use_contact_forces=True) reset_obs, info = env.reset() assert env.observation_space.contains( reset_obs ), f"Obseravtion of {env_spec.id} is not contained in the default observation space {env.observation_space} when using contact forces." step_obs, _, _, _, _ = env.step(action) assert env.observation_space.contains( step_obs ), f"Obseravtion returned by step(action) of {env_spec.id} is not contained in the default observation space {env.observation_space} when using contact forces." MUJOCO_V2_V3_ENVS = [ spec.name for spec in mujoco_testing_env_specs if spec.version == 2 and f"{spec.name}-v3" in gym.envs.registry ] @pytest.mark.parametrize("env_name", MUJOCO_V2_V3_ENVS) def test_mujoco_v2_to_v3_conversion(env_name: str): """Checks that all v2 mujoco environments are the same as v3 environments.""" verify_environments_match(f"{env_name}-v2", f"{env_name}-v3") @pytest.mark.parametrize("env_name", MUJOCO_V2_V3_ENVS) def test_mujoco_incompatible_v3_to_v2(env_name: str): """Checks that the v3 environment are slightly different from v2, (v3 has additional info keys that v2 does not).""" with pytest.raises(KeyError): verify_environments_match(f"{env_name}-v3", f"{env_name}-v2") ================================================ FILE: tests/envs/test_register.py ================================================ """Tests that `gym.register` works as expected.""" import re from typing import Optional import pytest import gym @pytest.fixture(scope="function") def register_testing_envs(): """Registers testing environments.""" namespace = "MyAwesomeNamespace" versioned_name = "MyAwesomeVersionedEnv" unversioned_name = "MyAwesomeUnversionedEnv" versions = [1, 3, 5] for version in versions: env_id = f"{namespace}/{versioned_name}-v{version}" gym.register( id=env_id, entry_point="tests.envs.utils_envs:ArgumentEnv", kwargs={ "arg1": "arg1", "arg2": "arg2", "arg3": "arg3", }, ) gym.register( id=f"{namespace}/{unversioned_name}", entry_point="tests.env.utils_envs:ArgumentEnv", kwargs={ "arg1": "arg1", "arg2": "arg2", "arg3": "arg3", }, ) yield for version in versions: env_id = f"{namespace}/{versioned_name}-v{version}" del gym.envs.registry[env_id] del gym.envs.registry[f"{namespace}/{unversioned_name}"] @pytest.mark.parametrize( "env_id, namespace, name, version", [ ( "MyAwesomeNamespace/MyAwesomeEnv-v0", "MyAwesomeNamespace", "MyAwesomeEnv", 0, ), ("MyAwesomeEnv-v0", None, "MyAwesomeEnv", 0), ("MyAwesomeEnv", None, "MyAwesomeEnv", None), ("MyAwesomeEnv-vfinal-v0", None, "MyAwesomeEnv-vfinal", 0), ("MyAwesomeEnv-vfinal", None, "MyAwesomeEnv-vfinal", None), ("MyAwesomeEnv--", None, "MyAwesomeEnv--", None), ("MyAwesomeEnv-v", None, "MyAwesomeEnv-v", None), ], ) def test_register( env_id: str, namespace: Optional[str], name: str, version: Optional[int] ): gym.register(env_id, "no-entry-point") assert gym.spec(env_id).id == env_id full_name = f"{name}" if namespace: full_name = f"{namespace}/{full_name}" if version is not None: full_name = f"{full_name}-v{version}" assert full_name in gym.envs.registry.keys() del gym.envs.registry[env_id] @pytest.mark.parametrize( "env_id", [ "“Breakout-v0”", "MyNotSoAwesomeEnv-vNone\n", "MyNamespace///MyNotSoAwesomeEnv-vNone", ], ) def test_register_error(env_id): with pytest.raises(gym.error.Error, match=f"^Malformed environment ID: {env_id}"): gym.register(env_id, "no-entry-point") @pytest.mark.parametrize( "env_id_input, env_id_suggested", [ ("cartpole-v1", "CartPole"), ("blackjack-v1", "Blackjack"), ("Blackjock-v1", "Blackjack"), ("mountaincarcontinuous-v0", "MountainCarContinuous"), ("taxi-v3", "Taxi"), ("taxi-v30", "Taxi"), ("MyAwesomeNamspce/MyAwesomeVersionedEnv-v1", "MyAwesomeNamespace"), ("MyAwesomeNamspce/MyAwesomeUnversionedEnv", "MyAwesomeNamespace"), ("MyAwesomeNamespace/MyAwesomeUnversioneEnv", "MyAwesomeUnversionedEnv"), ("MyAwesomeNamespace/MyAwesomeVersioneEnv", "MyAwesomeVersionedEnv"), ], ) def test_env_suggestions(register_testing_envs, env_id_input, env_id_suggested): with pytest.raises( gym.error.UnregisteredEnv, match=f"Did you mean: `{env_id_suggested}`?" ): gym.make(env_id_input, disable_env_checker=True) @pytest.mark.parametrize( "env_id_input, suggested_versions, default_version", [ ("CartPole-v12", "`v0`, `v1`", False), ("Blackjack-v10", "`v1`", False), ("MountainCarContinuous-v100", "`v0`", False), ("Taxi-v30", "`v3`", False), ("MyAwesomeNamespace/MyAwesomeVersionedEnv-v6", "`v1`, `v3`, `v5`", False), ("MyAwesomeNamespace/MyAwesomeUnversionedEnv-v6", "", True), ], ) def test_env_version_suggestions( register_testing_envs, env_id_input, suggested_versions, default_version ): if default_version: with pytest.raises( gym.error.DeprecatedEnv, match="It provides the default version", # env name, ): gym.make(env_id_input, disable_env_checker=True) else: with pytest.raises( gym.error.UnregisteredEnv, match=f"It provides versioned environments: \\[ {suggested_versions} \\]", ): gym.make(env_id_input, disable_env_checker=True) def test_register_versioned_unversioned(): # Register versioned then unversioned versioned_env = "Test/MyEnv-v0" gym.register(versioned_env, "no-entry-point") assert gym.envs.spec(versioned_env).id == versioned_env unversioned_env = "Test/MyEnv" with pytest.raises( gym.error.RegistrationError, match=re.escape( "Can't register the unversioned environment `Test/MyEnv` when the versioned environment `Test/MyEnv-v0` of the same name already exists" ), ): gym.register(unversioned_env, "no-entry-point") # Clean everything del gym.envs.registry[versioned_env] # Register unversioned then versioned gym.register(unversioned_env, "no-entry-point") assert gym.envs.spec(unversioned_env).id == unversioned_env with pytest.raises( gym.error.RegistrationError, match=re.escape( "Can't register the versioned environment `Test/MyEnv-v0` when the unversioned environment `Test/MyEnv` of the same name already exists." ), ): gym.register(versioned_env, "no-entry-point") # Clean everything del gym.envs.registry[unversioned_env] def test_make_latest_versioned_env(register_testing_envs): with pytest.warns( UserWarning, match=re.escape( "Using the latest versioned environment `MyAwesomeNamespace/MyAwesomeVersionedEnv-v5` instead of the unversioned environment `MyAwesomeNamespace/MyAwesomeVersionedEnv`." ), ): env = gym.make( "MyAwesomeNamespace/MyAwesomeVersionedEnv", disable_env_checker=True ) assert env.spec.id == "MyAwesomeNamespace/MyAwesomeVersionedEnv-v5" def test_namespace(): # Check if the namespace context manager works with gym.envs.registration.namespace("MyDefaultNamespace"): gym.register("MyDefaultEnvironment-v0", "no-entry-point") gym.register("MyDefaultEnvironment-v1", "no-entry-point") assert "MyDefaultNamespace/MyDefaultEnvironment-v0" in gym.envs.registry assert "MyDefaultEnvironment-v1" in gym.envs.registry del gym.envs.registry["MyDefaultNamespace/MyDefaultEnvironment-v0"] del gym.envs.registry["MyDefaultEnvironment-v1"] ================================================ FILE: tests/envs/test_spec.py ================================================ """Tests that gym.spec works as expected.""" import re import pytest import gym def test_spec(): spec = gym.spec("CartPole-v1") assert spec.id == "CartPole-v1" assert spec is gym.envs.registry["CartPole-v1"] def test_spec_kwargs(): map_name_value = "8x8" env = gym.make("FrozenLake-v1", map_name=map_name_value) assert env.spec.kwargs["map_name"] == map_name_value def test_spec_missing_lookup(): gym.register(id="Test1-v0", entry_point="no-entry-point") gym.register(id="Test1-v15", entry_point="no-entry-point") gym.register(id="Test1-v9", entry_point="no-entry-point") gym.register(id="Other1-v100", entry_point="no-entry-point") with pytest.raises( gym.error.DeprecatedEnv, match=re.escape( "Environment version v1 for `Test1` is deprecated. Please use `Test1-v15` instead." ), ): gym.spec("Test1-v1") with pytest.raises( gym.error.UnregisteredEnv, match=re.escape( "Environment version `v1000` for environment `Test1` doesn't exist. It provides versioned environments: [ `v0`, `v9`, `v15` ]." ), ): gym.spec("Test1-v1000") with pytest.raises( gym.error.UnregisteredEnv, match=re.escape("Environment Unknown1 doesn't exist. "), ): gym.spec("Unknown1-v1") def test_spec_malformed_lookup(): with pytest.raises( gym.error.Error, match=f'^{re.escape("Malformed environment ID: “Breakout-v0”.(Currently all IDs must be of the form [namespace/](env-name)-v(version). (namespace is optional))")}$', ): gym.spec("“Breakout-v0”") def test_spec_versioned_lookups(): gym.register("test/Test2-v5", "no-entry-point") with pytest.raises( gym.error.VersionNotFound, match=re.escape( "Environment version `v9` for environment `test/Test2` doesn't exist. It provides versioned environments: [ `v5` ]." ), ): gym.spec("test/Test2-v9") with pytest.raises( gym.error.DeprecatedEnv, match=re.escape( "Environment version v4 for `test/Test2` is deprecated. Please use `test/Test2-v5` instead." ), ): gym.spec("test/Test2-v4") assert gym.spec("test/Test2-v5") is not None def test_spec_default_lookups(): gym.register("test/Test3", "no-entry-point") with pytest.raises( gym.error.DeprecatedEnv, match=re.escape( "Environment version `v0` for environment `test/Test3` doesn't exist. It provides the default version test/Test3`." ), ): gym.spec("test/Test3-v0") assert gym.spec("test/Test3") is not None ================================================ FILE: tests/envs/utils.py ================================================ """Finds all the specs that we can test with""" from typing import List, Optional import numpy as np import gym from gym import error, logger from gym.envs.registration import EnvSpec def try_make_env(env_spec: EnvSpec) -> Optional[gym.Env]: """Tries to make the environment showing if it is possible. Warning the environments have no wrappers, including time limit and order enforcing. """ # To avoid issues with registered environments during testing, we check that the spec entry points are from gym.envs. if "gym.envs." in env_spec.entry_point: try: return env_spec.make(disable_env_checker=True).unwrapped except (ImportError, error.DependencyNotInstalled) as e: logger.warn(f"Not testing {env_spec.id} due to error: {e}") return None # Tries to make all environment to test with all_testing_initialised_envs: List[Optional[gym.Env]] = [ try_make_env(env_spec) for env_spec in gym.envs.registry.values() ] all_testing_initialised_envs: List[gym.Env] = [ env for env in all_testing_initialised_envs if env is not None ] # All testing, mujoco and gym environment specs all_testing_env_specs: List[EnvSpec] = [ env.spec for env in all_testing_initialised_envs ] mujoco_testing_env_specs: List[EnvSpec] = [ env_spec for env_spec in all_testing_env_specs if "gym.envs.mujoco" in env_spec.entry_point ] gym_testing_env_specs: List[EnvSpec] = [ env_spec for env_spec in all_testing_env_specs if any( f"gym.envs.{ep}" in env_spec.entry_point for ep in ["box2d", "classic_control", "toy_text"] ) ] # TODO, add minimum testing env spec in testing minimum_testing_env_specs = [ env_spec for env_spec in [ "CartPole-v1", "MountainCarContinuous-v0", "LunarLander-v2", "LunarLanderContinuous-v2", "CarRacing-v2", "Blackjack-v1", "Reacher-v4", ] if env_spec in all_testing_env_specs ] def assert_equals(a, b, prefix=None): """Assert equality of data structures `a` and `b`. Args: a: first data structure b: second data structure prefix: prefix for failed assertion message for types and dicts """ assert type(a) == type(b), f"{prefix}Differing types: {a} and {b}" if isinstance(a, dict): assert list(a.keys()) == list(b.keys()), f"{prefix}Key sets differ: {a} and {b}" for k in a.keys(): v_a = a[k] v_b = b[k] assert_equals(v_a, v_b) elif isinstance(a, np.ndarray): np.testing.assert_array_equal(a, b) elif isinstance(a, tuple): for elem_from_a, elem_from_b in zip(a, b): assert_equals(elem_from_a, elem_from_b) else: assert a == b ================================================ FILE: tests/envs/utils_envs.py ================================================ import gym class RegisterDuringMakeEnv(gym.Env): """Used in `test_registration.py` to check if `env.make` can import and register an env""" def __init__(self): self.action_space = gym.spaces.Discrete(1) self.observation_space = gym.spaces.Discrete(1) class ArgumentEnv(gym.Env): observation_space = gym.spaces.Box(low=-1, high=1, shape=(1,)) action_space = gym.spaces.Box(low=-1, high=1, shape=(1,)) def __init__(self, arg1, arg2, arg3): self.arg1 = arg1 self.arg2 = arg2 self.arg3 = arg3 # Environments to test render_mode class NoHuman(gym.Env): """Environment that does not have human-rendering.""" metadata = {"render_modes": ["rgb_array_list"], "render_fps": 4} def __init__(self, render_mode=None): assert render_mode in self.metadata["render_modes"] self.render_mode = render_mode class NoHumanOldAPI(gym.Env): """Environment that does not have human-rendering.""" metadata = {"render_modes": ["rgb_array_list"], "render_fps": 4} def __init__(self): pass class NoHumanNoRGB(gym.Env): """Environment that has neither human- nor rgb-rendering""" metadata = {"render_modes": ["ascii"], "render_fps": 4} def __init__(self, render_mode=None): assert render_mode in self.metadata["render_modes"] self.render_mode = render_mode ================================================ FILE: tests/spaces/__init__.py ================================================ ================================================ FILE: tests/spaces/test_box.py ================================================ import re import warnings import numpy as np import pytest import gym.error from gym.spaces import Box from gym.spaces.box import get_inf @pytest.mark.parametrize( "box,expected_shape", [ ( # Test with same 1-dim low and high shape Box(low=np.zeros(2), high=np.ones(2), dtype=np.int32), (2,), ), ( # Test with same multi-dim low and high shape Box(low=np.zeros((2, 1)), high=np.ones((2, 1)), dtype=np.int32), (2, 1), ), ( # Test with scalar low high and different shape Box(low=0, high=1, shape=(5, 2)), (5, 2), ), (Box(low=0, high=1), (1,)), # Test with int and int (Box(low=0.0, high=1.0), (1,)), # Test with float and float (Box(low=np.zeros(1)[0], high=np.ones(1)[0]), (1,)), (Box(low=0.0, high=1), (1,)), # Test with float and int (Box(low=0, high=np.int32(1)), (1,)), # Test with python int and numpy int32 (Box(low=0, high=np.ones(3)), (3,)), # Test with array and scalar (Box(low=np.zeros(3), high=1.0), (3,)), # Test with array and scalar ], ) def test_shape_inference(box, expected_shape): """Test that the shape inference is as expected.""" assert box.shape == expected_shape assert box.sample().shape == expected_shape @pytest.mark.parametrize( "value,valid", [ (1, True), (1.0, True), (np.int32(1), True), (np.float32(1.0), True), (np.zeros(2, dtype=np.float32), True), (np.zeros((2, 2), dtype=np.float32), True), (np.inf, True), (np.nan, True), # This is a weird case that we allow (True, False), (np.bool8(True), False), (1 + 1j, False), (np.complex128(1 + 1j), False), ("string", False), ], ) def test_low_high_values(value, valid: bool): """Test what `low` and `high` values are valid for `Box` space.""" if valid: with warnings.catch_warnings(record=True) as caught_warnings: Box(low=value, high=value) assert len(caught_warnings) == 0, tuple( warning.message for warning in caught_warnings ) else: with pytest.raises( ValueError, match=re.escape( "expect their types to be np.ndarray, an integer or a float" ), ): Box(low=value, high=value) @pytest.mark.parametrize( "low,high,kwargs,error,message", [ ( 0, 1, {"dtype": None}, AssertionError, "Box dtype must be explicitly provided, cannot be None.", ), ( 0, 1, {"shape": (None,)}, AssertionError, "Expect all shape elements to be an integer, actual type: (,)", ), ( 0, 1, { "shape": ( 1, None, ) }, AssertionError, "Expect all shape elements to be an integer, actual type: (, )", ), ( 0, 1, { "shape": ( np.int64(1), None, ) }, AssertionError, "Expect all shape elements to be an integer, actual type: (, )", ), ( None, None, {}, ValueError, "Box shape is inferred from low and high, expect their types to be np.ndarray, an integer or a float, actual type low: , high: ", ), ( 0, None, {}, ValueError, "Box shape is inferred from low and high, expect their types to be np.ndarray, an integer or a float, actual type low: , high: ", ), ( np.zeros(3), np.ones(2), {}, AssertionError, "high.shape doesn't match provided shape, high.shape: (2,), shape: (3,)", ), ], ) def test_init_errors(low, high, kwargs, error, message): """Test all constructor errors.""" with pytest.raises(error, match=f"^{re.escape(message)}$"): Box(low=low, high=high, **kwargs) def test_dtype_check(): """Tests the Box contains function with different dtypes.""" # Related Issues: # https://github.com/openai/gym/issues/2357 # https://github.com/openai/gym/issues/2298 space = Box(0, 1, (), dtype=np.float32) # casting will match the correct type assert np.array(0.5, dtype=np.float32) in space # float16 is in float32 space assert np.array(0.5, dtype=np.float16) in space # float64 is not in float32 space assert np.array(0.5, dtype=np.float64) not in space @pytest.mark.parametrize( "space", [ Box(low=0, high=np.inf, shape=(2,), dtype=np.int32), Box(low=0, high=np.inf, shape=(2,), dtype=np.float32), Box(low=0, high=np.inf, shape=(2,), dtype=np.int64), Box(low=0, high=np.inf, shape=(2,), dtype=np.float64), Box(low=-np.inf, high=0, shape=(2,), dtype=np.int32), Box(low=-np.inf, high=0, shape=(2,), dtype=np.float32), Box(low=-np.inf, high=0, shape=(2,), dtype=np.int64), Box(low=-np.inf, high=0, shape=(2,), dtype=np.float64), Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.int32), Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float32), Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.int64), Box(low=-np.inf, high=np.inf, shape=(2,), dtype=np.float64), Box(low=0, high=np.inf, shape=(2, 3), dtype=np.int32), Box(low=0, high=np.inf, shape=(2, 3), dtype=np.float32), Box(low=0, high=np.inf, shape=(2, 3), dtype=np.int64), Box(low=0, high=np.inf, shape=(2, 3), dtype=np.float64), Box(low=-np.inf, high=0, shape=(2, 3), dtype=np.int32), Box(low=-np.inf, high=0, shape=(2, 3), dtype=np.float32), Box(low=-np.inf, high=0, shape=(2, 3), dtype=np.int64), Box(low=-np.inf, high=0, shape=(2, 3), dtype=np.float64), Box(low=-np.inf, high=np.inf, shape=(2, 3), dtype=np.int32), Box(low=-np.inf, high=np.inf, shape=(2, 3), dtype=np.float32), Box(low=-np.inf, high=np.inf, shape=(2, 3), dtype=np.int64), Box(low=-np.inf, high=np.inf, shape=(2, 3), dtype=np.float64), Box(low=np.array([-np.inf, 0]), high=np.array([0.0, np.inf]), dtype=np.int32), Box(low=np.array([-np.inf, 0]), high=np.array([0.0, np.inf]), dtype=np.float32), Box(low=np.array([-np.inf, 0]), high=np.array([0.0, np.inf]), dtype=np.int64), Box(low=np.array([-np.inf, 0]), high=np.array([0.0, np.inf]), dtype=np.float64), ], ) def test_infinite_space(space): """ To test spaces that are passed in have only 0 or infinite bounds because `space.high` and `space.low` are both modified within the init, we check for infinite when we know it's not 0 """ assert np.all( space.low < space.high ), f"Box low bound ({space.low}) is not lower than the high bound ({space.high})" space.seed(0) sample = space.sample() # check if space contains sample assert ( sample in space ), f"Sample ({sample}) not inside space according to `space.contains()`" # manually check that the sign of the sample is within the bounds assert np.all( np.sign(sample) <= np.sign(space.high) ), f"Sign of sample ({sample}) is less than space upper bound ({space.high})" assert np.all( np.sign(space.low) <= np.sign(sample) ), f"Sign of sample ({sample}) is more than space lower bound ({space.low})" # check that int bounds are bounded for everything # but floats are unbounded for infinite if np.any(space.high != 0): assert ( space.is_bounded("above") is False ), "inf upper bound supposed to be unbounded" else: assert ( space.is_bounded("above") is True ), "non-inf upper bound supposed to be bounded" if np.any(space.low != 0): assert ( space.is_bounded("below") is False ), "inf lower bound supposed to be unbounded" else: assert ( space.is_bounded("below") is True ), "non-inf lower bound supposed to be bounded" if np.any(space.low != 0) or np.any(space.high != 0): assert space.is_bounded("both") is False else: assert space.is_bounded("both") is True # check for dtype assert ( space.high.dtype == space.dtype ), f"High's dtype {space.high.dtype} doesn't match `space.dtype`'" assert ( space.low.dtype == space.dtype ), f"Low's dtype {space.high.dtype} doesn't match `space.dtype`'" with pytest.raises( ValueError, match="manner is not in {'below', 'above', 'both'}, actual value:" ): space.is_bounded("test") def test_legacy_state_pickling(): legacy_state = { "dtype": np.dtype("float32"), "_shape": (5,), "low": np.array([0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float32), "high": np.array([1.0, 1.0, 1.0, 1.0, 1.0], dtype=np.float32), "bounded_below": np.array([True, True, True, True, True]), "bounded_above": np.array([True, True, True, True, True]), "_np_random": None, } b = Box(-1, 1, ()) assert "low_repr" in b.__dict__ and "high_repr" in b.__dict__ del b.__dict__["low_repr"] del b.__dict__["high_repr"] assert "low_repr" not in b.__dict__ and "high_repr" not in b.__dict__ b.__setstate__(legacy_state) assert b.low_repr == "0.0" assert b.high_repr == "1.0" def test_get_inf(): """Tests that get inf function works as expected, primarily for coverage.""" assert get_inf(np.float32, "+") == np.inf assert get_inf(np.float16, "-") == -np.inf with pytest.raises( TypeError, match=re.escape("Unknown sign *, use either '+' or '-'") ): get_inf(np.float32, "*") assert get_inf(np.int16, "+") == 32765 assert get_inf(np.int8, "-") == -126 with pytest.raises( TypeError, match=re.escape("Unknown sign *, use either '+' or '-'") ): get_inf(np.int32, "*") with pytest.raises( ValueError, match=re.escape("Unknown dtype for infinite bounds"), ): get_inf(np.complex_, "+") def test_sample_mask(): """Box cannot have a mask applied.""" space = Box(0, 1) with pytest.raises( gym.error.Error, match=re.escape("Box.sample cannot be provided a mask, actual value: "), ): space.sample(mask=np.array([0, 1, 0], dtype=np.int8)) ================================================ FILE: tests/spaces/test_dict.py ================================================ from collections import OrderedDict import numpy as np import pytest from gym.spaces import Box, Dict, Discrete def test_dict_init(): with pytest.raises( AssertionError, match=r"^Unexpected Dict space input, expecting dict, OrderedDict or Sequence, actual type: ", ): Dict(Discrete(2)) with pytest.raises( ValueError, match="Dict space keyword 'a' already exists in the spaces dictionary", ): Dict({"a": Discrete(3)}, a=Box(0, 1)) with pytest.raises( AssertionError, match="Dict space element is not an instance of Space: key='b', space=Box", ): Dict(a=Discrete(2), b="Box") with pytest.warns(None) as warnings: a = Dict({"a": Discrete(2), "b": Box(low=0.0, high=1.0)}) b = Dict(OrderedDict(a=Discrete(2), b=Box(low=0.0, high=1.0))) c = Dict((("a", Discrete(2)), ("b", Box(low=0.0, high=1.0)))) d = Dict(a=Discrete(2), b=Box(low=0.0, high=1.0)) assert a == b == c == d assert len(warnings) == 0 with pytest.warns(None) as warnings: Dict({1: Discrete(2), "a": Discrete(3)}) assert len(warnings) == 0 DICT_SPACE = Dict( { "a": Box(low=0, high=1, shape=(3, 3)), "b": Dict( { "b_1": Box(low=-100, high=100, shape=(2,)), "b_2": Box(low=-1, high=1, shape=(2,)), } ), "c": Discrete(5), } ) def test_dict_seeding(): seeds = DICT_SPACE.seed( { "a": 0, "b": { "b_1": 1, "b_2": 2, }, "c": 3, } ) assert all(isinstance(seed, int) for seed in seeds) # "Unpack" the dict sub-spaces into individual spaces a = Box(low=0, high=1, shape=(3, 3), seed=0) b_1 = Box(low=-100, high=100, shape=(2,), seed=1) b_2 = Box(low=-1, high=1, shape=(2,), seed=2) c = Discrete(5, seed=3) for i in range(10): dict_sample = DICT_SPACE.sample() assert np.all(dict_sample["a"] == a.sample()) assert np.all(dict_sample["b"]["b_1"] == b_1.sample()) assert np.all(dict_sample["b"]["b_2"] == b_2.sample()) assert dict_sample["c"] == c.sample() def test_int_seeding(): seeds = DICT_SPACE.seed(1) assert all(isinstance(seed, int) for seed in seeds) # rng, seeds = seeding.np_random(1) # subseeds = rng.choice(np.iinfo(int).max, size=3, replace=False) # b_rng, b_seeds = seeding.np_random(int(subseeds[1])) # b_subseeds = b_rng.choice(np.iinfo(int).max, size=2, replace=False) # "Unpack" the dict sub-spaces into individual spaces a = Box(low=0, high=1, shape=(3, 3), seed=seeds[1]) b_1 = Box(low=-100, high=100, shape=(2,), seed=seeds[3]) b_2 = Box(low=-1, high=1, shape=(2,), seed=seeds[4]) c = Discrete(5, seed=seeds[5]) for i in range(10): dict_sample = DICT_SPACE.sample() assert np.all(dict_sample["a"] == a.sample()) assert np.all(dict_sample["b"]["b_1"] == b_1.sample()) assert np.all(dict_sample["b"]["b_2"] == b_2.sample()) assert dict_sample["c"] == c.sample() def test_none_seeding(): seeds = DICT_SPACE.seed(None) assert len(seeds) == 4 and all(isinstance(seed, int) for seed in seeds) def test_bad_seed(): with pytest.raises(TypeError): DICT_SPACE.seed("a") def test_mapping(): """The Gym Dict space inherits from Mapping that allows it to appear like a standard python Dictionary.""" assert len(DICT_SPACE) == 3 a = DICT_SPACE["a"] b = Discrete(5) assert a != b DICT_SPACE["a"] = b assert DICT_SPACE["a"] == b with pytest.raises( AssertionError, match="Trying to set a to Dict space with value that is not a gym space, actual type: ", ): DICT_SPACE["a"] = 5 DICT_SPACE["a"] = a def test_iterator(): """Tests the Dict `__iter__` function correctly returns keys in the subspaces""" for key in DICT_SPACE: assert key in DICT_SPACE.spaces assert {key for key in DICT_SPACE} == DICT_SPACE.spaces.keys() ================================================ FILE: tests/spaces/test_discrete.py ================================================ import numpy as np from gym.spaces import Discrete def test_space_legacy_pickling(): """Test the legacy pickle of Discrete that is missing the `start` parameter.""" legacy_state = { "shape": ( 1, 2, 3, ), "dtype": np.int64, "np_random": np.random.default_rng(), "n": 3, } space = Discrete(1) space.__setstate__(legacy_state) assert space.shape == legacy_state["shape"] assert space.np_random == legacy_state["np_random"] assert space.n == 3 assert space.dtype == legacy_state["dtype"] # Test that start is missing assert "start" in space.__dict__ del space.__dict__["start"] # legacy did not include start param assert "start" not in space.__dict__ space.__setstate__(legacy_state) assert space.start == 0 def test_sample_mask(): space = Discrete(4, start=2) assert 2 <= space.sample() < 6 assert space.sample(mask=np.array([0, 1, 0, 0], dtype=np.int8)) == 3 assert space.sample(mask=np.array([0, 0, 0, 0], dtype=np.int8)) == 2 assert space.sample(mask=np.array([0, 1, 0, 1], dtype=np.int8)) in [3, 5] ================================================ FILE: tests/spaces/test_graph.py ================================================ import re import numpy as np import pytest from gym.spaces import Discrete, Graph, GraphInstance def test_node_space_sample(): space = Graph(node_space=Discrete(3), edge_space=None) space.seed(0) sample = space.sample( mask=(tuple(np.array([0, 1, 0], dtype=np.int8) for _ in range(5)), None), num_nodes=5, ) assert sample in space assert np.all(sample.nodes == 1) sample = space.sample( ( (np.array([1, 0, 0], dtype=np.int8), np.array([0, 1, 0], dtype=np.int8)), None, ), num_nodes=2, ) assert sample in space assert np.all(sample.nodes == np.array([0, 1])) with pytest.warns( UserWarning, match=re.escape("The number of edges is set (5) but the edge space is None."), ): sample = space.sample(num_edges=5) assert sample in space # Change the node_space or edge_space to a non-Box or discrete space. # This should not happen, test is primarily to increase coverage. with pytest.raises( TypeError, match=re.escape( "Expects base space to be Box and Discrete, actual space: " ), ): space.node_space = "abc" space.sample() def test_edge_space_sample(): space = Graph(node_space=Discrete(3), edge_space=Discrete(3)) space.seed(0) # When num_nodes>1 then num_edges is set to 0 assert space.sample(num_nodes=1).edges is None assert 0 <= len(space.sample(num_edges=3).edges) < 6 sample = space.sample(mask=(None, np.array([0, 1, 0], dtype=np.int8))) assert np.all(sample.edges == 1) or sample.edges is None sample = space.sample( mask=( None, ( np.array([1, 0, 0], dtype=np.int8), np.array([0, 1, 0], dtype=np.int8), np.array([0, 0, 1], dtype=np.int8), ), ), num_edges=3, ) assert np.all(sample.edges == np.array([0, 1, 2])) with pytest.raises( AssertionError, match="Expects the number of edges to be greater than 0, actual value: -1", ): space.sample(num_edges=-1) space = Graph(node_space=Discrete(3), edge_space=None) with pytest.warns( UserWarning, match=re.escape( "\x1b[33mWARN: The number of edges is set (5) but the edge space is None.\x1b[0m" ), ): sample = space.sample(num_edges=5) assert sample.edges is None @pytest.mark.parametrize( "sample", [ "abc", GraphInstance( nodes=None, edges=np.array([0, 1]), edge_links=np.array([[0, 1], [1, 0]]) ), GraphInstance( nodes=np.array([10, 1, 0]), edges=np.array([0, 1]), edge_links=np.array([[0, 1], [1, 0]]), ), GraphInstance( nodes=np.array([0, 1]), edges=None, edge_links=np.array([[0, 1], [1, 0]]) ), GraphInstance(nodes=np.array([0, 1]), edges=np.array([0, 1]), edge_links=None), GraphInstance( nodes=np.array([1, 2]), edges=np.array([10, 1]), edge_links=np.array([[0, 1], [1, 0]]), ), GraphInstance( nodes=np.array([1, 2]), edges=np.array([0, 1]), edge_links=np.array([[0.5, 1.0], [2.0, 1.0]]), ), GraphInstance( nodes=np.array([1, 2]), edges=np.array([10, 1]), edge_links=np.array([0, 1]) ), GraphInstance( nodes=np.array([1, 2]), edges=np.array([0, 1]), edge_links=np.array([[[0], [1]], [[0], [0]]]), ), GraphInstance( nodes=np.array([1, 2]), edges=np.array([0, 1]), edge_links=np.array([[10, 1], [0, 0]]), ), GraphInstance( nodes=np.array([1, 2]), edges=np.array([0, 1]), edge_links=np.array([[-10, 1], [0, 0]]), ), ], ) def test_not_contains(sample): space = Graph(node_space=Discrete(2), edge_space=Discrete(2)) assert sample not in space ================================================ FILE: tests/spaces/test_multibinary.py ================================================ import numpy as np from gym.spaces import MultiBinary def test_sample(): space = MultiBinary(4) sample = space.sample(mask=np.array([0, 0, 1, 1], dtype=np.int8)) assert np.all(sample == [0, 0, 1, 1]) sample = space.sample(mask=np.array([0, 1, 2, 2], dtype=np.int8)) assert sample[0] == 0 and sample[1] == 1 assert sample[2] == 0 or sample[2] == 1 assert sample[3] == 0 or sample[3] == 1 space = MultiBinary(np.array([2, 3])) sample = space.sample(mask=np.array([[0, 0, 0], [1, 1, 1]], dtype=np.int8)) assert np.all(sample == [[0, 0, 0], [1, 1, 1]]), sample ================================================ FILE: tests/spaces/test_multidiscrete.py ================================================ import pytest from gym.spaces import Discrete, MultiDiscrete from gym.utils.env_checker import data_equivalence def test_multidiscrete_as_tuple(): # 1D multi-discrete space = MultiDiscrete([3, 4, 5]) assert space.shape == (3,) assert space[0] == Discrete(3) assert space[0:1] == MultiDiscrete([3]) assert space[0:2] == MultiDiscrete([3, 4]) assert space[:] == space and space[:] is not space # 2D multi-discrete space = MultiDiscrete([[3, 4, 5], [6, 7, 8]]) assert space.shape == (2, 3) assert space[0, 1] == Discrete(4) assert space[0] == MultiDiscrete([3, 4, 5]) assert space[0:1] == MultiDiscrete([[3, 4, 5]]) assert space[0:2, :] == MultiDiscrete([[3, 4, 5], [6, 7, 8]]) assert space[:, 0:1] == MultiDiscrete([[3], [6]]) assert space[0:2, 0:2] == MultiDiscrete([[3, 4], [6, 7]]) assert space[:] == space and space[:] is not space assert space[:, :] == space and space[:, :] is not space def test_multidiscrete_subspace_reproducibility(): # 1D multi-discrete space = MultiDiscrete([100, 200, 300]) space.seed() assert data_equivalence(space[0].sample(), space[0].sample()) assert data_equivalence(space[0:1].sample(), space[0:1].sample()) assert data_equivalence(space[0:2].sample(), space[0:2].sample()) assert data_equivalence(space[:].sample(), space[:].sample()) assert data_equivalence(space[:].sample(), space.sample()) # 2D multi-discrete space = MultiDiscrete([[300, 400, 500], [600, 700, 800]]) space.seed() assert data_equivalence(space[0, 1].sample(), space[0, 1].sample()) assert data_equivalence(space[0].sample(), space[0].sample()) assert data_equivalence(space[0:1].sample(), space[0:1].sample()) assert data_equivalence(space[0:2, :].sample(), space[0:2, :].sample()) assert data_equivalence(space[:, 0:1].sample(), space[:, 0:1].sample()) assert data_equivalence(space[0:2, 0:2].sample(), space[0:2, 0:2].sample()) assert data_equivalence(space[:].sample(), space[:].sample()) assert data_equivalence(space[:, :].sample(), space[:, :].sample()) assert data_equivalence(space[:, :].sample(), space.sample()) def test_multidiscrete_length(): space = MultiDiscrete(nvec=[3, 2, 4]) assert len(space) == 3 space = MultiDiscrete(nvec=[[2, 3], [3, 2]]) with pytest.warns( UserWarning, match="Getting the length of a multi-dimensional MultiDiscrete space.", ): assert len(space) == 2 ================================================ FILE: tests/spaces/test_sequence.py ================================================ import re import numpy as np import pytest import gym.spaces def test_sample(): """Tests the sequence sampling works as expects and the errors are correctly raised.""" space = gym.spaces.Sequence(gym.spaces.Box(0, 1)) # Test integer mask length for length in range(4): sample = space.sample(mask=(length, None)) assert sample in space assert len(sample) == length with pytest.raises( AssertionError, match=re.escape( "Expects the length mask to be greater than or equal to zero, actual value: -1" ), ): space.sample(mask=(-1, None)) # Test np.array mask length sample = space.sample(mask=(np.array([5]), None)) assert sample in space assert len(sample) == 5 sample = space.sample(mask=(np.array([3, 4, 5]), None)) assert sample in space assert len(sample) in [3, 4, 5] with pytest.raises( AssertionError, match=re.escape( "Expects the shape of the length mask to be 1-dimensional, actual shape: (2, 2)" ), ): space.sample(mask=(np.array([[2, 2], [2, 2]]), None)) with pytest.raises( AssertionError, match=re.escape( "Expects all values in the length_mask to be greater than or equal to zero, actual values: [ 1 2 -1]" ), ): space.sample(mask=(np.array([1, 2, -1]), None)) # Test with an invalid length with pytest.raises( TypeError, match=re.escape( "Expects the type of length_mask to an integer or a np.ndarray, actual type: " ), ): space.sample(mask=("abc", None)) ================================================ FILE: tests/spaces/test_space.py ================================================ from functools import partial import pytest from gym import Space from gym.spaces import utils TESTING_SPACE = Space() @pytest.mark.parametrize( "func", [ TESTING_SPACE.sample, partial(TESTING_SPACE.contains, None), partial(utils.flatdim, TESTING_SPACE), partial(utils.flatten, TESTING_SPACE, None), partial(utils.flatten_space, TESTING_SPACE), partial(utils.unflatten, TESTING_SPACE, None), ], ) def test_not_implemented_errors(func): with pytest.raises(NotImplementedError): func() ================================================ FILE: tests/spaces/test_spaces.py ================================================ import copy import itertools import json # note: ujson fails this test due to float equality import pickle import tempfile from typing import List, Union import numpy as np import pytest from gym.spaces import Box, Discrete, MultiBinary, MultiDiscrete, Space, Text from gym.utils import seeding from gym.utils.env_checker import data_equivalence from tests.spaces.utils import ( TESTING_FUNDAMENTAL_SPACES, TESTING_FUNDAMENTAL_SPACES_IDS, TESTING_SPACES, TESTING_SPACES_IDS, ) # Due to this test taking a 1ms each then we don't mind generating so many tests # This generates all pairs of spaces of the same type in TESTING_SPACES TESTING_SPACES_PERMUTATIONS = list( itertools.chain( *[ list(itertools.permutations(list(group), r=2)) for key, group in itertools.groupby( TESTING_SPACES, key=lambda space: type(space) ) ] ) ) @pytest.mark.parametrize("space", TESTING_SPACES, ids=TESTING_SPACES_IDS) def test_roundtripping(space: Space): """Tests if space samples passed to `to_jsonable` and `from_jsonable` produce the original samples.""" sample_1 = space.sample() sample_2 = space.sample() # Convert the samples to json, dump + load json and convert back to python sample_json = space.to_jsonable([sample_1, sample_2]) sample_roundtripped = json.loads(json.dumps(sample_json)) sample_1_prime, sample_2_prime = space.from_jsonable(sample_roundtripped) # Check if the samples are equivalent assert data_equivalence( sample_1, sample_1_prime ), f"sample 1: {sample_1}, prime: {sample_1_prime}" assert data_equivalence( sample_2, sample_2_prime ), f"sample 2: {sample_2}, prime: {sample_2_prime}" @pytest.mark.parametrize( "space_1,space_2", TESTING_SPACES_PERMUTATIONS, ids=[f"({s1}, {s2})" for s1, s2 in TESTING_SPACES_PERMUTATIONS], ) def test_space_equality(space_1, space_2): """Check that `space.__eq__` works. Testing spaces permutations contains all combinations of testing spaces of the same type. """ assert space_1 == space_1 assert space_2 == space_2 assert space_1 != space_2 # The expected sum of variance for an alpha of 0.05 # CHI_SQUARED = [0] + [scipy.stats.chi2.isf(0.05, df=df) for df in range(1, 25)] CHI_SQUARED = np.array( [ 0.01, 3.8414588206941285, 5.991464547107983, 7.814727903251178, 9.487729036781158, 11.070497693516355, 12.59158724374398, 14.067140449340167, 15.507313055865454, 16.91897760462045, ] ) @pytest.mark.parametrize( "space", TESTING_FUNDAMENTAL_SPACES, ids=TESTING_FUNDAMENTAL_SPACES_IDS ) def test_sample(space: Space, n_trials: int = 1_000): """Test the space sample has the expected distribution with the chi-squared test and KS test. Example code with scipy.stats.chisquared that should have the same >>> import scipy.stats >>> variance = np.sum(np.square(observed_frequency - expected_frequency) / expected_frequency) >>> f'X2 at alpha=0.05 = {scipy.stats.chi2.isf(0.05, df=4)}' >>> f'p-value = {scipy.stats.chi2.sf(variance, df=4)}' >>> scipy.stats.chisquare(f_obs=observed_frequency) """ space.seed(0) samples = np.array([space.sample() for _ in range(n_trials)]) assert len(samples) == n_trials if isinstance(space, Box): # TODO: Add KS testing for continuous uniform distribution pass elif isinstance(space, Discrete): expected_frequency = np.ones(space.n) * n_trials / space.n observed_frequency = np.zeros(space.n) for sample in samples: observed_frequency[sample - space.start] += 1 degrees_of_freedom = space.n - 1 assert observed_frequency.shape == expected_frequency.shape assert np.sum(observed_frequency) == n_trials variance = np.sum( np.square(expected_frequency - observed_frequency) / expected_frequency ) assert variance < CHI_SQUARED[degrees_of_freedom] elif isinstance(space, MultiBinary): expected_frequency = n_trials / 2 observed_frequency = np.sum(samples, axis=0) assert observed_frequency.shape == space.shape # As this is a binary space, then we can be lazy in the variance as the np.square is symmetric for the 0 and 1 categories variance = ( 2 * np.square(observed_frequency - expected_frequency) / expected_frequency ) assert variance.shape == space.shape assert np.all(variance < CHI_SQUARED[1]) elif isinstance(space, MultiDiscrete): # Due to the multi-axis capability of MultiDiscrete, these functions need to be recursive and that the expected / observed numpy are of non-regular shapes def _generate_frequency(dim, func): if isinstance(dim, np.ndarray): return np.array( [_generate_frequency(sub_dim, func) for sub_dim in dim], dtype=object, ) else: return func(dim) def _update_observed_frequency(obs_sample, obs_freq): if isinstance(obs_sample, np.ndarray): for sub_sample, sub_freq in zip(obs_sample, obs_freq): _update_observed_frequency(sub_sample, sub_freq) else: obs_freq[obs_sample] += 1 expected_frequency = _generate_frequency( space.nvec, lambda dim: np.ones(dim) * n_trials / dim ) observed_frequency = _generate_frequency(space.nvec, lambda dim: np.zeros(dim)) for sample in samples: _update_observed_frequency(sample, observed_frequency) def _chi_squared_test(dim, exp_freq, obs_freq): if isinstance(dim, np.ndarray): for sub_dim, sub_exp_freq, sub_obs_freq in zip(dim, exp_freq, obs_freq): _chi_squared_test(sub_dim, sub_exp_freq, sub_obs_freq) else: assert exp_freq.shape == (dim,) and obs_freq.shape == (dim,) assert np.sum(obs_freq) == n_trials assert np.sum(exp_freq) == n_trials _variance = np.sum(np.square(exp_freq - obs_freq) / exp_freq) _degrees_of_freedom = dim - 1 assert _variance < CHI_SQUARED[_degrees_of_freedom] _chi_squared_test(space.nvec, expected_frequency, observed_frequency) elif isinstance(space, Text): expected_frequency = ( np.ones(len(space.character_set)) * n_trials * (space.min_length + (space.max_length - space.min_length) / 2) / len(space.character_set) ) observed_frequency = np.zeros(len(space.character_set)) for sample in samples: for x in sample: observed_frequency[space.character_index(x)] += 1 degrees_of_freedom = len(space.character_set) - 1 assert observed_frequency.shape == expected_frequency.shape assert np.sum(observed_frequency) == sum(len(sample) for sample in samples) variance = np.sum( np.square(expected_frequency - observed_frequency) / expected_frequency ) if degrees_of_freedom == 61: # scipy.stats.chi2.isf(0.05, df=61) assert variance < 80.23209784876272 else: assert variance < CHI_SQUARED[degrees_of_freedom] else: raise NotImplementedError(f"Unknown sample testing for {type(space)}") SAMPLE_MASK_RNG, _ = seeding.np_random(1) @pytest.mark.parametrize( "space,mask", itertools.zip_longest( TESTING_FUNDAMENTAL_SPACES, [ # Discrete np.array([1, 1, 0], dtype=np.int8), np.array([0, 0, 0], dtype=np.int8), # Box None, None, None, None, None, # Multi-discrete (np.array([1, 1], dtype=np.int8), np.array([0, 0], dtype=np.int8)), ( (np.array([1, 0], dtype=np.int8), np.array([0, 1, 1], dtype=np.int8)), (np.array([1, 1, 0], dtype=np.int8), np.array([0, 1], dtype=np.int8)), ), # Multi-binary np.array([0, 1, 0, 1, 0, 2, 1, 1], dtype=np.int8), np.array([[0, 1, 2], [0, 2, 1]], dtype=np.int8), # Text (None, SAMPLE_MASK_RNG.integers(low=0, high=2, size=62, dtype=np.int8)), (4, SAMPLE_MASK_RNG.integers(low=0, high=2, size=62, dtype=np.int8)), (None, np.array([1, 1, 0, 1, 0, 0], dtype=np.int8)), ], ), ids=TESTING_FUNDAMENTAL_SPACES_IDS, ) def test_space_sample_mask(space: Space, mask, n_trials: int = 100): """Tests that the sampling a space with a mask has the expected distribution. The implemented code is similar to the `test_space_sample` that considers the mask applied. """ if isinstance(space, Box): # The box space can't have a sample mask assert mask is None return assert mask is not None space.seed(1) samples = np.array([space.sample(mask) for _ in range(n_trials)]) if isinstance(space, Discrete): if np.any(mask == 1): expected_frequency = np.ones(space.n) * (n_trials / np.sum(mask)) * mask else: expected_frequency = np.zeros(space.n) expected_frequency[0] = n_trials observed_frequency = np.zeros(space.n) for sample in samples: observed_frequency[sample - space.start] += 1 degrees_of_freedom = max(np.sum(mask) - 1, 0) assert observed_frequency.shape == expected_frequency.shape assert np.sum(observed_frequency) == n_trials assert np.sum(expected_frequency) == n_trials variance = np.sum( np.square(expected_frequency - observed_frequency) / np.clip(expected_frequency, 1, None) ) assert variance < CHI_SQUARED[degrees_of_freedom] elif isinstance(space, MultiBinary): expected_frequency = ( np.ones(space.shape) * np.where(mask == 2, 0.5, mask) * n_trials ) print(expected_frequency) observed_frequency = np.sum(samples, axis=0) assert space.shape == expected_frequency.shape == observed_frequency.shape variance = ( 2 * np.square(observed_frequency - expected_frequency) / np.clip(expected_frequency, 1, None) ) assert variance.shape == space.shape assert np.all(variance < CHI_SQUARED[1]) elif isinstance(space, MultiDiscrete): # Due to the multi-axis capability of MultiDiscrete, these functions need to be recursive and that the expected / observed numpy are of non-regular shapes def _generate_frequency( _dim: Union[np.ndarray, int], _mask, func: callable ) -> List: if isinstance(_dim, np.ndarray): return [ _generate_frequency(sub_dim, sub_mask, func) for sub_dim, sub_mask in zip(_dim, _mask) ] else: return func(_dim, _mask) def _update_observed_frequency(obs_sample, obs_freq): if isinstance(obs_sample, np.ndarray): for sub_sample, sub_freq in zip(obs_sample, obs_freq): _update_observed_frequency(sub_sample, sub_freq) else: obs_freq[obs_sample] += 1 def _exp_freq_fn(_dim: int, _mask: np.ndarray): if np.any(_mask == 1): assert _dim == len(_mask) return np.ones(_dim) * (n_trials / np.sum(_mask)) * _mask else: freq = np.zeros(_dim) freq[0] = n_trials return freq expected_frequency = _generate_frequency( space.nvec, mask, lambda dim, _mask: _exp_freq_fn(dim, _mask) ) observed_frequency = _generate_frequency( space.nvec, mask, lambda dim, _: np.zeros(dim) ) for sample in samples: _update_observed_frequency(sample, observed_frequency) def _chi_squared_test(dim, _mask, exp_freq, obs_freq): if isinstance(dim, np.ndarray): for sub_dim, sub_mask, sub_exp_freq, sub_obs_freq in zip( dim, _mask, exp_freq, obs_freq ): _chi_squared_test(sub_dim, sub_mask, sub_exp_freq, sub_obs_freq) else: assert exp_freq.shape == (dim,) and obs_freq.shape == (dim,) assert np.sum(obs_freq) == n_trials assert np.sum(exp_freq) == n_trials _variance = np.sum( np.square(exp_freq - obs_freq) / np.clip(exp_freq, 1, None) ) _degrees_of_freedom = max(np.sum(_mask) - 1, 0) assert _variance < CHI_SQUARED[_degrees_of_freedom] _chi_squared_test(space.nvec, mask, expected_frequency, observed_frequency) elif isinstance(space, Text): length, charlist_mask = mask if length is None: expected_length = ( space.min_length + (space.max_length - space.min_length) / 2 ) else: expected_length = length if np.any(charlist_mask == 1): expected_frequency = ( np.ones(len(space.character_set)) * n_trials * expected_length / np.sum(charlist_mask) * charlist_mask ) else: expected_frequency = np.zeros(len(space.character_set)) observed_frequency = np.zeros(len(space.character_set)) for sample in samples: for char in sample: observed_frequency[space.character_index(char)] += 1 degrees_of_freedom = max(np.sum(charlist_mask) - 1, 0) assert observed_frequency.shape == expected_frequency.shape assert np.sum(observed_frequency) == sum(len(sample) for sample in samples) variance = np.sum( np.square(expected_frequency - observed_frequency) / np.clip(expected_frequency, 1, None) ) if degrees_of_freedom == 26: # scipy.stats.chi2.isf(0.05, df=29) assert variance < 38.88513865983007 elif degrees_of_freedom == 31: # scipy.stats.chi2.isf(0.05, df=31) assert variance < 44.985343280365136 else: assert variance < CHI_SQUARED[degrees_of_freedom] else: raise NotImplementedError() @pytest.mark.parametrize("space", TESTING_SPACES, ids=TESTING_SPACES_IDS) def test_seed_reproducibility(space): """Test that the set the space seed will reproduce the same samples.""" space_1 = space space_2 = copy.deepcopy(space) for seed in range(5): assert space_1.seed(seed) == space_2.seed(seed) # With the same seed, the two spaces should be identical assert all( data_equivalence(space_1.sample(), space_2.sample()) for _ in range(10) ) assert space_1.seed(123) != space_2.seed(456) # Due to randomness, it is difficult to test that random seeds produce different answers # Therefore, taking 10 samples and checking that they are not all the same. assert not all( data_equivalence(space_1.sample(), space_2.sample()) for _ in range(10) ) SPACE_CLS = list(dict.fromkeys(type(space) for space in TESTING_SPACES)) SPACE_KWARGS = [ {"n": 3}, # Discrete {"low": 1, "high": 10}, # Box {"nvec": [3, 2]}, # MultiDiscrete {"n": 2}, # MultiBinary {"max_length": 5}, # Text {"spaces": (Discrete(3), Discrete(2))}, # Tuple {"spaces": {"a": Discrete(3), "b": Discrete(2)}}, # Dict {"node_space": Discrete(4), "edge_space": Discrete(3)}, # Graph {"space": Discrete(4)}, # Sequence ] assert len(SPACE_CLS) == len(SPACE_KWARGS) @pytest.mark.parametrize( "space_cls,kwarg", list(zip(SPACE_CLS, SPACE_KWARGS)), ids=[f"{space_cls}" for space_cls in SPACE_CLS], ) def test_seed_np_random(space_cls, kwarg): """During initialisation of a space, a rng instance can be passed to the space. Test that the space's `np_random` is the rng instance """ rng, _ = seeding.np_random(123) space = space_cls(seed=rng, **kwarg) assert space.np_random is rng @pytest.mark.parametrize("space", TESTING_SPACES, ids=TESTING_SPACES_IDS) def test_sample_contains(space): """Test that samples are contained within the space. Then test that for all other spaces, we test that an error is not raise with a sample and a bool is returned. As other spaces can be contained with this space, we cannot test that the contains is always true or false. """ for _ in range(10): sample = space.sample() assert sample in space assert space.contains(sample) for other_space in TESTING_SPACES: assert isinstance(space.contains(other_space.sample()), bool) @pytest.mark.parametrize("space", TESTING_SPACES, ids=TESTING_SPACES_IDS) def test_repr(space): assert isinstance(str(space), str) @pytest.mark.parametrize("space", TESTING_SPACES, ids=TESTING_SPACES_IDS) def test_space_pickling(space): """Tests the spaces can be pickled with the unpickled version being equivalent to the original.""" space.seed(0) # Pickle and unpickle with a string pickled_space = pickle.dumps(space) unpickled_space = pickle.loads(pickled_space) assert space == unpickled_space # Pickle and unpickle with a file with tempfile.TemporaryFile() as f: pickle.dump(space, f) f.seek(0) file_unpickled_space = pickle.load(f) assert space == file_unpickled_space # Check that space samples are the same space_sample = space.sample() unpickled_sample = unpickled_space.sample() file_unpickled_sample = file_unpickled_space.sample() assert data_equivalence(space_sample, unpickled_sample) assert data_equivalence(space_sample, file_unpickled_sample) ================================================ FILE: tests/spaces/test_text.py ================================================ import re import numpy as np import pytest from gym.spaces import Text def test_sample_mask(): space = Text(min_length=1, max_length=5) # Test the sample length sample = space.sample(mask=(3, None)) assert sample in space assert len(sample) == 3 sample = space.sample(mask=None) assert sample in space assert 1 <= len(sample) <= 5 with pytest.raises( ValueError, match=re.escape( "Trying to sample with a minimum length > 0 (1) but the character mask is all zero meaning that no character could be sampled." ), ): space.sample(mask=(3, np.zeros(len(space.character_set), dtype=np.int8))) space = Text(min_length=0, max_length=5) sample = space.sample( mask=(None, np.zeros(len(space.character_set), dtype=np.int8)) ) assert sample in space assert sample == "" # Test the sample characters space = Text(max_length=5, charset="abcd") sample = space.sample(mask=(3, np.array([0, 1, 0, 0], dtype=np.int8))) assert sample in space assert sample == "bbb" ================================================ FILE: tests/spaces/test_tuple.py ================================================ import numpy as np import pytest import gym.spaces from gym.spaces import Box, Dict, Discrete, MultiBinary, Tuple from gym.utils.env_checker import data_equivalence def test_sequence_inheritance(): """The gym Tuple space inherits from abc.Sequences, this test checks all functions work""" spaces = [Discrete(5), Discrete(10), Discrete(5)] tuple_space = Tuple(spaces) assert len(tuple_space) == len(spaces) # Test indexing for i in range(len(tuple_space)): assert tuple_space[i] == spaces[i] # Test iterable for space in tuple_space: assert space in spaces # Test count assert tuple_space.count(Discrete(5)) == 2 assert tuple_space.count(Discrete(6)) == 0 assert tuple_space.count(MultiBinary(2)) == 0 # Test index assert tuple_space.index(Discrete(5)) == 0 assert tuple_space.index(Discrete(5), 1) == 2 # Test errors with pytest.raises(ValueError): tuple_space.index(Discrete(10), 0, 1) with pytest.raises(IndexError): assert tuple_space[4] @pytest.mark.parametrize( "space, seed, expected_len", [ (Tuple([Discrete(5), Discrete(4)]), None, 2), (Tuple([Discrete(5), Discrete(4)]), 123, 3), (Tuple([Discrete(5), Discrete(4)]), (123, 456), 2), ( Tuple( (Discrete(5), Tuple((Box(low=0.0, high=1.0, shape=(3,)), Discrete(2)))) ), (123, (456, 789)), 3, ), ( Tuple( ( Discrete(3), Dict(position=Box(low=0.0, high=1.0), velocity=Discrete(2)), ) ), (123, {"position": 456, "velocity": 789}), 3, ), ], ) def test_seeds(space, seed, expected_len): seeds = space.seed(seed) assert isinstance(seeds, list) and all(isinstance(elem, int) for elem in seeds) assert len(seeds) == expected_len sample1 = space.sample() seeds2 = space.seed(seed) sample2 = space.sample() data_equivalence(seeds, seeds2) data_equivalence(sample1, sample2) @pytest.mark.parametrize( "space_fn", [ lambda: Tuple(["abc"]), lambda: Tuple([gym.spaces.Box(0, 1), "abc"]), lambda: Tuple("abc"), ], ) def test_bad_space_calls(space_fn): with pytest.raises(AssertionError): space_fn() def test_contains_promotion(): space = gym.spaces.Tuple((gym.spaces.Box(0, 1), gym.spaces.Box(-1, 0, (2,)))) assert ( np.array([0.0], dtype=np.float32), np.array([0.0, 0.0], dtype=np.float32), ) in space space = gym.spaces.Tuple((gym.spaces.Box(0, 1), gym.spaces.Box(-1, 0, (1,)))) assert np.array([[0.0], [0.0]], dtype=np.float32) in space def test_bad_seed(): space = gym.spaces.Tuple((gym.spaces.Box(0, 1), gym.spaces.Box(0, 1))) with pytest.raises( TypeError, match="Expected seed type: list, tuple, int or None, actual type: ", ): space.seed(0.0) ================================================ FILE: tests/spaces/test_utils.py ================================================ from itertools import zip_longest from typing import Optional import numpy as np import pytest import gym from gym.spaces import Box, Graph, utils from gym.utils.env_checker import data_equivalence from tests.spaces.utils import TESTING_SPACES, TESTING_SPACES_IDS TESTING_SPACES_EXPECTED_FLATDIMS = [ # Discrete 3, 3, # Box 1, 4, 2, 2, 2, # Multi-discrete 4, 10, # Multi-binary 8, 6, # Text 6, 6, 6, # Tuple 9, 7, 10, 6, None, # Dict 7, 8, 17, None, # Graph None, None, None, # Sequence None, None, None, ] @pytest.mark.parametrize( ["space", "flatdim"], zip_longest(TESTING_SPACES, TESTING_SPACES_EXPECTED_FLATDIMS), ids=TESTING_SPACES_IDS, ) def test_flatdim(space: gym.spaces.Space, flatdim: Optional[int]): """Checks that the flattened dims of the space is equal to an expected value.""" if space.is_np_flattenable: dim = utils.flatdim(space) assert dim == flatdim, f"Expected {dim} to equal {flatdim}" else: with pytest.raises( ValueError, ): utils.flatdim(space) @pytest.mark.parametrize("space", TESTING_SPACES, ids=TESTING_SPACES_IDS) def test_flatten_space(space): """Test that the flattened spaces are a box and have the `flatdim` shape.""" flat_space = utils.flatten_space(space) if space.is_np_flattenable: assert isinstance(flat_space, Box) (single_dim,) = flat_space.shape flatdim = utils.flatdim(space) assert single_dim == flatdim elif isinstance(flat_space, Graph): assert isinstance(space, Graph) (node_single_dim,) = flat_space.node_space.shape node_flatdim = utils.flatdim(space.node_space) assert node_single_dim == node_flatdim if flat_space.edge_space is not None: (edge_single_dim,) = flat_space.edge_space.shape edge_flatdim = utils.flatdim(space.edge_space) assert edge_single_dim == edge_flatdim else: assert isinstance( space, (gym.spaces.Tuple, gym.spaces.Dict, gym.spaces.Sequence) ) @pytest.mark.parametrize("space", TESTING_SPACES, ids=TESTING_SPACES_IDS) def test_flatten(space): """Test that a flattened sample have the `flatdim` shape.""" flattened_sample = utils.flatten(space, space.sample()) if space.is_np_flattenable: assert isinstance(flattened_sample, np.ndarray) (single_dim,) = flattened_sample.shape flatdim = utils.flatdim(space) assert single_dim == flatdim else: assert isinstance(flattened_sample, (tuple, dict, Graph)) @pytest.mark.parametrize("space", TESTING_SPACES, ids=TESTING_SPACES_IDS) def test_flat_space_contains_flat_points(space): """Test that the flattened samples are contained within the flattened space.""" flattened_samples = [utils.flatten(space, space.sample()) for _ in range(10)] flat_space = utils.flatten_space(space) for flat_sample in flattened_samples: assert flat_sample in flat_space @pytest.mark.parametrize("space", TESTING_SPACES, ids=TESTING_SPACES_IDS) def test_flatten_roundtripping(space): """Tests roundtripping with flattening and unflattening are equal to the original sample.""" samples = [space.sample() for _ in range(10)] flattened_samples = [utils.flatten(space, sample) for sample in samples] unflattened_samples = [ utils.unflatten(space, sample) for sample in flattened_samples ] for original, roundtripped in zip(samples, unflattened_samples): assert data_equivalence(original, roundtripped) ================================================ FILE: tests/spaces/utils.py ================================================ from typing import List import numpy as np from gym.spaces import ( Box, Dict, Discrete, Graph, MultiBinary, MultiDiscrete, Sequence, Space, Text, Tuple, ) TESTING_FUNDAMENTAL_SPACES = [ Discrete(3), Discrete(3, start=-1), Box(low=0.0, high=1.0), Box(low=0.0, high=np.inf, shape=(2, 2)), Box(low=np.array([-10.0, 0.0]), high=np.array([10.0, 10.0]), dtype=np.float64), Box(low=-np.inf, high=0.0, shape=(2, 1)), Box(low=0.0, high=np.inf, shape=(2, 1)), MultiDiscrete([2, 2]), MultiDiscrete([[2, 3], [3, 2]]), MultiBinary(8), MultiBinary([2, 3]), Text(6), Text(min_length=3, max_length=6), Text(6, charset="abcdef"), ] TESTING_FUNDAMENTAL_SPACES_IDS = [f"{space}" for space in TESTING_FUNDAMENTAL_SPACES] TESTING_COMPOSITE_SPACES = [ # Tuple spaces Tuple([Discrete(5), Discrete(4)]), Tuple( ( Discrete(5), Box( low=np.array([0.0, 0.0]), high=np.array([1.0, 5.0]), dtype=np.float64, ), ) ), Tuple((Discrete(5), Tuple((Box(low=0.0, high=1.0, shape=(3,)), Discrete(2))))), Tuple((Discrete(3), Dict(position=Box(low=0.0, high=1.0), velocity=Discrete(2)))), Tuple((Graph(node_space=Box(-1, 1, shape=(2, 1)), edge_space=None), Discrete(2))), # Dict spaces Dict( { "position": Discrete(5), "velocity": Box( low=np.array([0.0, 0.0]), high=np.array([1.0, 5.0]), dtype=np.float64, ), } ), Dict( position=Discrete(6), velocity=Box( low=np.array([0.0, 0.0]), high=np.array([1.0, 5.0]), dtype=np.float64, ), ), Dict( { "a": Box(low=0, high=1, shape=(3, 3)), "b": Dict( { "b_1": Box(low=-100, high=100, shape=(2,)), "b_2": Box(low=-1, high=1, shape=(2,)), } ), "c": Discrete(4), } ), Dict( a=Dict( a=Graph(node_space=Box(-100, 100, shape=(2, 2)), edge_space=None), b=Box(-100, 100, shape=(2, 2)), ), b=Tuple((Box(-100, 100, shape=(2,)), Box(-100, 100, shape=(2,)))), ), # Graph spaces Graph(node_space=Box(low=-100, high=100, shape=(3, 4)), edge_space=Discrete(5)), Graph(node_space=Discrete(5), edge_space=Box(low=-100, high=100, shape=(3, 4))), Graph(node_space=Discrete(3), edge_space=Discrete(4)), # Sequence spaces Sequence(Discrete(4)), Sequence(Dict({"feature": Box(0, 1, (3,))})), Sequence(Graph(node_space=Box(-100, 100, shape=(2, 2)), edge_space=Discrete(4))), ] TESTING_COMPOSITE_SPACES_IDS = [f"{space}" for space in TESTING_COMPOSITE_SPACES] TESTING_SPACES: List[Space] = TESTING_FUNDAMENTAL_SPACES + TESTING_COMPOSITE_SPACES TESTING_SPACES_IDS = TESTING_FUNDAMENTAL_SPACES_IDS + TESTING_COMPOSITE_SPACES_IDS ================================================ FILE: tests/test_core.py ================================================ from typing import Optional import numpy as np import pytest from gym import core, spaces from gym.wrappers import OrderEnforcing, TimeLimit class ArgumentEnv(core.Env): observation_space = spaces.Box(low=0, high=1, shape=(1,)) action_space = spaces.Box(low=0, high=1, shape=(1,)) calls = 0 def __init__(self, arg): self.calls += 1 self.arg = arg class UnittestEnv(core.Env): observation_space = spaces.Box(low=0, high=255, shape=(64, 64, 3), dtype=np.uint8) action_space = spaces.Discrete(3) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) return self.observation_space.sample(), {"info": "dummy"} def step(self, action): observation = self.observation_space.sample() # Dummy observation return (observation, 0.0, False, {}) class UnknownSpacesEnv(core.Env): """This environment defines its observation & action spaces only after the first call to reset. Although this pattern is sometimes necessary when implementing a new environment (e.g. if it depends on external resources), it is not encouraged. """ def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) self.observation_space = spaces.Box( low=0, high=255, shape=(64, 64, 3), dtype=np.uint8 ) self.action_space = spaces.Discrete(3) return self.observation_space.sample(), {} # Dummy observation with info def step(self, action): observation = self.observation_space.sample() # Dummy observation return (observation, 0.0, False, {}) class OldStyleEnv(core.Env): """This environment doesn't accept any arguments in reset, ideally we want to support this too (for now)""" def __init__(self): pass def reset(self): super().reset() return 0 def step(self, action): return 0, 0, False, {} class NewPropertyWrapper(core.Wrapper): def __init__( self, env, observation_space=None, action_space=None, reward_range=None, metadata=None, ): super().__init__(env) if observation_space is not None: # Only set the observation space if not None to test property forwarding self.observation_space = observation_space if action_space is not None: self.action_space = action_space if reward_range is not None: self.reward_range = reward_range if metadata is not None: self.metadata = metadata def test_env_instantiation(): # This looks like a pretty trivial, but given our usage of # __new__, it's worth having. env = ArgumentEnv("arg") assert env.arg == "arg" assert env.calls == 1 properties = [ { "observation_space": spaces.Box( low=0.0, high=1.0, shape=(64, 64, 3), dtype=np.float32 ) }, {"action_space": spaces.Discrete(2)}, {"reward_range": (-1.0, 1.0)}, {"metadata": {"render_modes": ["human", "rgb_array_list"]}}, { "observation_space": spaces.Box( low=0.0, high=1.0, shape=(64, 64, 3), dtype=np.float32 ), "action_space": spaces.Discrete(2), }, ] @pytest.mark.parametrize("class_", [UnittestEnv, UnknownSpacesEnv]) @pytest.mark.parametrize("props", properties) def test_wrapper_property_forwarding(class_, props): env = class_() env = NewPropertyWrapper(env, **props) # If UnknownSpacesEnv, then call reset to define the spaces if isinstance(env.unwrapped, UnknownSpacesEnv): _ = env.reset() # Test the properties set by the wrapper for key, value in props.items(): assert getattr(env, key) == value # Otherwise, test if the properties are forwarded all_properties = {"observation_space", "action_space", "reward_range", "metadata"} for key in all_properties - props.keys(): assert getattr(env, key) == getattr(env.unwrapped, key) def test_compatibility_with_old_style_env(): env = OldStyleEnv() env = OrderEnforcing(env) env = TimeLimit(env) obs = env.reset() assert obs == 0 ================================================ FILE: tests/testing_env.py ================================================ """Provides a generic testing environment for use in tests with custom reset, step and render functions.""" import types from typing import Any, Dict, Optional, Tuple, Union import gym from gym import spaces from gym.core import ActType, ObsType from gym.envs.registration import EnvSpec def basic_reset_fn( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]: """A basic reset function that will pass the environment check using random actions from the observation space.""" super(GenericTestEnv, self).reset(seed=seed) self.observation_space.seed(seed) return self.observation_space.sample(), {"options": options} def new_step_fn(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]: """A step function that follows the new step api that will pass the environment check using random actions from the observation space.""" return self.observation_space.sample(), 0, False, False, {} def old_step_fn(self, action: ActType) -> Tuple[ObsType, float, bool, dict]: """A step function that follows the old step api that will pass the environment check using random actions from the observation space.""" return self.observation_space.sample(), 0, False, {} def basic_render_fn(self): """Basic render fn that does nothing.""" pass # todo: change all testing environment to this generic class class GenericTestEnv(gym.Env): """A generic testing environment for use in testing with modified environments are required.""" def __init__( self, action_space: spaces.Space = spaces.Box(0, 1, (1,)), observation_space: spaces.Space = spaces.Box(0, 1, (1,)), reset_fn: callable = basic_reset_fn, step_fn: callable = new_step_fn, render_fn: callable = basic_render_fn, metadata: Optional[Dict[str, Any]] = None, render_mode: Optional[str] = None, spec: EnvSpec = EnvSpec("TestingEnv-v0", "testing-env-no-entry-point"), ): self.metadata = {} if metadata is None else metadata self.render_mode = render_mode self.spec = spec if observation_space is not None: self.observation_space = observation_space if action_space is not None: self.action_space = action_space if reset_fn is not None: self.reset = types.MethodType(reset_fn, self) if step_fn is not None: self.step = types.MethodType(step_fn, self) if render_fn is not None: self.render = types.MethodType(render_fn, self) def reset( self, *, seed: Optional[int] = None, options: Optional[dict] = None, ) -> Union[ObsType, Tuple[ObsType, dict]]: # If you need a default working reset function, use `basic_reset_fn` above raise NotImplementedError("TestingEnv reset_fn is not set.") def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]: raise NotImplementedError("TestingEnv step_fn is not set.") def render(self): raise NotImplementedError("testingEnv render_fn is not set.") ================================================ FILE: tests/utils/__init__.py ================================================ ================================================ FILE: tests/utils/test_env_checker.py ================================================ """Tests that the `env_checker` runs as expects and all errors are possible.""" import re import warnings from typing import Tuple, Union import numpy as np import pytest import gym from gym import spaces from gym.core import ObsType from gym.utils.env_checker import ( check_env, check_reset_options, check_reset_return_info_deprecation, check_reset_return_type, check_reset_seed, check_seed_deprecation, ) from tests.testing_env import GenericTestEnv @pytest.mark.parametrize( "env", [ gym.make("CartPole-v1", disable_env_checker=True).unwrapped, gym.make("MountainCar-v0", disable_env_checker=True).unwrapped, GenericTestEnv( observation_space=spaces.Dict( a=spaces.Discrete(10), b=spaces.Box(np.zeros(2), np.ones(2)) ) ), GenericTestEnv( observation_space=spaces.Tuple( [spaces.Discrete(10), spaces.Box(np.zeros(2), np.ones(2))] ) ), GenericTestEnv( observation_space=spaces.Dict( a=spaces.Tuple( [spaces.Discrete(10), spaces.Box(np.zeros(2), np.ones(2))] ), b=spaces.Box(np.zeros(2), np.ones(2)), ) ), ], ) def test_no_error_warnings(env): """A full version of this test with all gym envs is run in tests/envs/test_envs.py.""" with warnings.catch_warnings(record=True) as caught_warnings: check_env(env) assert len(caught_warnings) == 0, [warning.message for warning in caught_warnings] def _no_super_reset(self, seed=None, options=None): self.np_random.random() # generates a new prng # generate seed deterministic result self.observation_space.seed(0) return self.observation_space.sample(), {} def _super_reset_fixed(self, seed=None, options=None): # Call super that ignores the seed passed, use fixed seed super(GenericTestEnv, self).reset(seed=1) # deterministic output self.observation_space._np_random = self.np_random return self.observation_space.sample(), {} def _reset_default_seed(self: GenericTestEnv, seed="Error", options=None): super(GenericTestEnv, self).reset(seed=seed) self.observation_space._np_random = ( # pyright: ignore [reportPrivateUsage] self.np_random ) return self.observation_space.sample(), {} @pytest.mark.parametrize( "test,func,message", [ [ gym.error.Error, lambda self: (self.observation_space.sample(), {}), "The `reset` method does not provide a `seed` or `**kwargs` keyword argument.", ], [ AssertionError, lambda self, seed, *_: (self.observation_space.sample(), {}), "Expects the random number generator to have been generated given a seed was passed to reset. Mostly likely the environment reset function does not call `super().reset(seed=seed)`.", ], [ AssertionError, _no_super_reset, "Mostly likely the environment reset function does not call `super().reset(seed=seed)` as the random generates are not same when the same seeds are passed to `env.reset`.", ], [ AssertionError, _super_reset_fixed, "Mostly likely the environment reset function does not call `super().reset(seed=seed)` as the random number generators are not different when different seeds are passed to `env.reset`.", ], [ UserWarning, _reset_default_seed, "The default seed argument in reset should be `None`, otherwise the environment will by default always be deterministic. Actual default: Error", ], ], ) def test_check_reset_seed(test, func: callable, message: str): """Tests the check reset seed function works as expected.""" if test is UserWarning: with pytest.warns( UserWarning, match=f"^\\x1b\\[33mWARN: {re.escape(message)}\\x1b\\[0m$" ): check_reset_seed(GenericTestEnv(reset_fn=func)) else: with pytest.raises(test, match=f"^{re.escape(message)}$"): check_reset_seed(GenericTestEnv(reset_fn=func)) def _deprecated_return_info( self, return_info: bool = False ) -> Union[Tuple[ObsType, dict], ObsType]: """function to simulate the signature and behavior of a `reset` function with the deprecated `return_info` optional argument""" if return_info: return self.observation_space.sample(), {} else: return self.observation_space.sample() def _reset_var_keyword_kwargs(self, kwargs): return self.observation_space.sample(), {} def _reset_return_info_type(self, seed=None, options=None): """Returns a `list` instead of a `tuple`. This function is used to make sure `env_checker` correctly checks that the return type of `env.reset()` is a `tuple`""" return [self.observation_space.sample(), {}] def _reset_return_info_length(self, seed=None, options=None): return 1, 2, 3 def _return_info_obs_outside(self, seed=None, options=None): return self.observation_space.sample() + self.observation_space.high, {} def _return_info_not_dict(self, seed=None, options=None): return self.observation_space.sample(), ["key", "value"] @pytest.mark.parametrize( "test,func,message", [ [ AssertionError, _reset_return_info_type, "The result returned by `env.reset()` was not a tuple of the form `(obs, info)`, where `obs` is a observation and `info` is a dictionary containing additional information. Actual type: ``", ], [ AssertionError, _reset_return_info_length, "Calling the reset method did not return a 2-tuple, actual length: 3", ], [ AssertionError, _return_info_obs_outside, "The first element returned by `env.reset()` is not within the observation space.", ], [ AssertionError, _return_info_not_dict, "The second element returned by `env.reset()` was not a dictionary, actual type: ", ], ], ) def test_check_reset_return_type(test, func: callable, message: str): """Tests the check `env.reset()` function has a correct return type.""" with pytest.raises(test, match=f"^{re.escape(message)}$"): check_reset_return_type(GenericTestEnv(reset_fn=func)) @pytest.mark.parametrize( "test,func,message", [ [ UserWarning, _deprecated_return_info, "`return_info` is deprecated as an optional argument to `reset`. `reset`" "should now always return `obs, info` where `obs` is an observation, and `info` is a dictionary" "containing additional information.", ], ], ) def test_check_reset_return_info_deprecation(test, func: callable, message: str): """Tests that return_info has been correct deprecated as an argument to `env.reset()`.""" with pytest.warns(test, match=f"^\\x1b\\[33mWARN: {re.escape(message)}\\x1b\\[0m$"): check_reset_return_info_deprecation(GenericTestEnv(reset_fn=func)) def test_check_seed_deprecation(): """Tests that `check_seed_deprecation()` throws a warning if `env.seed()` has not been removed.""" message = """Official support for the `seed` function is dropped. Standard practice is to reset gym environments using `env.reset(seed=)`""" env = GenericTestEnv() def seed(seed): return with pytest.warns( UserWarning, match=f"^\\x1b\\[33mWARN: {re.escape(message)}\\x1b\\[0m$" ): env.seed = seed assert callable(env.seed) check_seed_deprecation(env) with warnings.catch_warnings(record=True) as caught_warnings: env.seed = [] check_seed_deprecation(env) env.seed = 123 check_seed_deprecation(env) del env.seed check_seed_deprecation(env) assert len(caught_warnings) == 0 def test_check_reset_options(): """Tests the check_reset_options function.""" with pytest.raises( gym.error.Error, match=re.escape( "The `reset` method does not provide an `options` or `**kwargs` keyword argument" ), ): check_reset_options(GenericTestEnv(reset_fn=lambda self: (0, {}))) @pytest.mark.parametrize( "env,message", [ [ "Error", "The environment must inherit from the gym.Env class. See https://www.gymlibrary.dev/content/environment_creation/ for more info.", ], [ GenericTestEnv(action_space=None), "The environment must specify an action space. See https://www.gymlibrary.dev/content/environment_creation/ for more info.", ], [ GenericTestEnv(observation_space=None), "The environment must specify an observation space. See https://www.gymlibrary.dev/content/environment_creation/ for more info.", ], ], ) def test_check_env(env: gym.Env, message: str): """Tests the check_env function works as expected.""" with pytest.raises(AssertionError, match=f"^{re.escape(message)}$"): check_env(env) ================================================ FILE: tests/utils/test_passive_env_checker.py ================================================ import re import warnings from typing import Dict, Union import numpy as np import pytest import gym from gym import spaces from gym.utils.passive_env_checker import ( check_action_space, check_obs, check_observation_space, env_render_passive_checker, env_reset_passive_checker, env_step_passive_checker, ) from tests.testing_env import GenericTestEnv def _modify_space(space: spaces.Space, attribute: str, value): setattr(space, attribute, value) return space @pytest.mark.parametrize( "test,space,message", [ [ AssertionError, "error", "observation space does not inherit from `gym.spaces.Space`, actual type: ", ], # ===== Check box observation space ==== [ UserWarning, spaces.Box(np.zeros((5, 5, 1)), 255 * np.ones((5, 5, 1)), dtype=np.int32), "It seems a Box observation space is an image but the `dtype` is not `np.uint8`, actual type: int32. If the Box observation space is not an image, we recommend flattening the observation to have only a 1D vector.", ], [ UserWarning, spaces.Box(np.ones((2, 2, 1)), 255 * np.ones((2, 2, 1)), dtype=np.uint8), "It seems a Box observation space is an image but the upper and lower bounds are not in [0, 255]. Generally, CNN policies assume observations are within that range, so you may encounter an issue if the observation values are not.", ], [ UserWarning, spaces.Box(np.zeros((5, 5, 1)), np.ones((5, 5, 1)), dtype=np.uint8), "It seems a Box observation space is an image but the upper and lower bounds are not in [0, 255]. Generally, CNN policies assume observations are within that range, so you may encounter an issue if the observation values are not.", ], [ UserWarning, spaces.Box(np.zeros((5, 5)), np.ones((5, 5))), "A Box observation space has an unconventional shape (neither an image, nor a 1D vector). We recommend flattening the observation to have only a 1D vector or use a custom policy to properly process the data. Actual observation shape: (5, 5)", ], [ UserWarning, spaces.Box(np.zeros(5), np.zeros(5)), "A Box observation space maximum and minimum values are equal.", ], [ UserWarning, spaces.Box(np.ones(5), np.zeros(5)), "A Box observation space low value is greater than a high value.", ], [ AssertionError, _modify_space(spaces.Box(np.zeros(2), np.ones(2)), "low", np.zeros(3)), "The Box observation space shape and low shape have different shapes, low shape: (3,), box shape: (2,)", ], [ AssertionError, _modify_space(spaces.Box(np.zeros(2), np.ones(2)), "high", np.ones(3)), "The Box observation space shape and high shape have have different shapes, high shape: (3,), box shape: (2,)", ], # ==== Other observation spaces (Discrete, MultiDiscrete, MultiBinary, Tuple, Dict) [ AssertionError, _modify_space(spaces.Discrete(5), "n", -1), "Discrete observation space's number of elements must be positive, actual number of elements: -1", ], [ AssertionError, _modify_space(spaces.MultiDiscrete([2, 2]), "nvec", np.array([2, -1])), "Multi-discrete observation space's all nvec elements must be greater than 0, actual nvec: [ 2 -1]", ], [ AssertionError, _modify_space(spaces.MultiDiscrete([2, 2]), "_shape", (2, 1, 2)), "Multi-discrete observation space's shape must be equal to the nvec shape, space shape: (2, 1, 2), nvec shape: (2,)", ], [ AssertionError, _modify_space(spaces.MultiBinary((2, 2)), "_shape", (2, -1)), "Multi-binary observation space's all shape elements must be greater than 0, actual shape: (2, -1)", ], [ AssertionError, spaces.Tuple([]), "An empty Tuple observation space is not allowed.", ], [ AssertionError, spaces.Dict(), "An empty Dict observation space is not allowed.", ], ], ) def test_check_observation_space(test, space, message: str): """Tests the check observation space.""" if test is UserWarning: with pytest.warns( UserWarning, match=f"^\\x1b\\[33mWARN: {re.escape(message)}\\x1b\\[0m$" ): check_observation_space(space) else: with warnings.catch_warnings(record=True) as caught_warnings: with pytest.raises(test, match=f"^{re.escape(message)}$"): check_observation_space(space) assert len(caught_warnings) == 0 @pytest.mark.parametrize( "test,space,message", [ [ AssertionError, "error", "action space does not inherit from `gym.spaces.Space`, actual type: ", ], # ===== Check box observation space ==== [ UserWarning, spaces.Box(np.zeros(5), np.zeros(5)), "A Box action space maximum and minimum values are equal.", ], [ UserWarning, spaces.Box(np.ones(5), np.zeros(5)), "A Box action space low value is greater than a high value.", ], [ AssertionError, _modify_space(spaces.Box(np.zeros(2), np.ones(2)), "low", np.zeros(3)), "The Box action space shape and low shape have have different shapes, low shape: (3,), box shape: (2,)", ], [ AssertionError, _modify_space(spaces.Box(np.zeros(2), np.ones(2)), "high", np.ones(3)), "The Box action space shape and high shape have different shapes, high shape: (3,), box shape: (2,)", ], # ==== Other observation spaces (Discrete, MultiDiscrete, MultiBinary, Tuple, Dict) [ AssertionError, _modify_space(spaces.Discrete(5), "n", -1), "Discrete action space's number of elements must be positive, actual number of elements: -1", ], [ AssertionError, _modify_space(spaces.MultiDiscrete([2, 2]), "_shape", (2, -1)), "Multi-discrete action space's shape must be equal to the nvec shape, space shape: (2, -1), nvec shape: (2,)", ], [ AssertionError, _modify_space(spaces.MultiBinary((2, 2)), "_shape", (2, -1)), "Multi-binary action space's all shape elements must be greater than 0, actual shape: (2, -1)", ], [ AssertionError, spaces.Tuple([]), "An empty Tuple action space is not allowed.", ], [AssertionError, spaces.Dict(), "An empty Dict action space is not allowed."], ], ) def test_check_action_space( test: Union[UserWarning, type], space: spaces.Space, message: str ): """Tests the check action space function.""" if test is UserWarning: with pytest.warns( UserWarning, match=f"^\\x1b\\[33mWARN: {re.escape(message)}\\x1b\\[0m$" ): check_action_space(space) else: with warnings.catch_warnings(record=True) as caught_warnings: with pytest.raises(test, match=f"^{re.escape(message)}$"): check_action_space(space) assert len(caught_warnings) == 0 @pytest.mark.parametrize( "test,obs,obs_space,message", [ [ UserWarning, 3, spaces.Discrete(2), "The obs returned by the `testing()` method is not within the observation space.", ], [ UserWarning, np.uint8(0), spaces.Discrete(1), "The obs returned by the `testing()` method should be an int or np.int64, actual type: ", ], [ UserWarning, [0, 1], spaces.Tuple([spaces.Discrete(1), spaces.Discrete(2)]), "The obs returned by the `testing()` method was expecting a tuple, actual type: ", ], [ AssertionError, (1, 2, 3), spaces.Tuple([spaces.Discrete(1), spaces.Discrete(2)]), "The obs returned by the `testing()` method length is not same as the observation space length, obs length: 3, space length: 2", ], [ AssertionError, {1, 2, 3}, spaces.Dict(a=spaces.Discrete(1), b=spaces.Discrete(2)), "The obs returned by the `testing()` method must be a dict, actual type: ", ], [ AssertionError, {"a": 1, "c": 2}, spaces.Dict(a=spaces.Discrete(1), b=spaces.Discrete(2)), "The obs returned by the `testing()` method observation keys is not same as the observation space keys, obs keys: ['a', 'c'], space keys: ['a', 'b']", ], ], ) def test_check_obs(test, obs, obs_space: spaces.Space, message: str): """Tests the check observations function.""" if test is UserWarning: with pytest.warns( UserWarning, match=f"^\\x1b\\[33mWARN: {re.escape(message)}\\x1b\\[0m$" ): check_obs(obs, obs_space, "testing") else: with warnings.catch_warnings(record=True) as caught_warnings: with pytest.raises(test, match=f"^{re.escape(message)}$"): check_obs(obs, obs_space, "testing") assert len(caught_warnings) == 0 def _reset_no_seed(self, options=None): return self.observation_space.sample(), {} def _reset_seed_default(self, seed="error", options=None): return self.observation_space.sample(), {} def _reset_no_option(self, seed=None): return self.observation_space.sample(), {} def _make_reset_results(results): def _reset_result(self, seed=None, options=None): return results return _reset_result @pytest.mark.parametrize( "test,func,message,kwargs", [ [ UserWarning, _reset_no_seed, "Future gym versions will require that `Env.reset` can be passed a `seed` instead of using `Env.seed` for resetting the environment random number generator.", {}, ], [ UserWarning, _reset_seed_default, "The default seed argument in `Env.reset` should be `None`, otherwise the environment will by default always be deterministic. Actual default: seed='error'", {}, ], [ UserWarning, _reset_no_option, "Future gym versions will require that `Env.reset` can be passed `options` to allow the environment initialisation to be passed additional information.", {}, ], [ UserWarning, _make_reset_results([0, {}]), "The result returned by `env.reset()` was not a tuple of the form `(obs, info)`, where `obs` is a observation and `info` is a dictionary containing additional information. Actual type: ``", {}, ], [ AssertionError, _make_reset_results((np.array([0], dtype=np.float32), {1, 2})), "The second element returned by `env.reset()` was not a dictionary, actual type: ", {}, ], ], ) def test_passive_env_reset_checker(test, func: callable, message: str, kwargs: Dict): """Tests the passive env reset check""" if test is UserWarning: with pytest.warns( UserWarning, match=f"^\\x1b\\[33mWARN: {re.escape(message)}\\x1b\\[0m$" ): env_reset_passive_checker(GenericTestEnv(reset_fn=func), **kwargs) else: with warnings.catch_warnings(record=True) as caught_warnings: with pytest.raises(test, match=f"^{re.escape(message)}$"): env_reset_passive_checker(GenericTestEnv(reset_fn=func), **kwargs) assert len(caught_warnings) == 0 def _modified_step( self, obs=None, reward=0, terminated=False, truncated=False, info=None ): if obs is None: obs = self.observation_space.sample() if info is None: info = {} if truncated is None: return obs, reward, terminated, info else: return obs, reward, terminated, truncated, info @pytest.mark.parametrize( "test,func,message", [ [ AssertionError, lambda self, _: "error", "Expects step result to be a tuple, actual type: ", ], [ UserWarning, lambda self, _: _modified_step(self, terminated="error", truncated=None), "Expects `done` signal to be a boolean, actual type: ", ], [ UserWarning, lambda self, _: _modified_step(self, terminated="error", truncated=False), "Expects `terminated` signal to be a boolean, actual type: ", ], [ UserWarning, lambda self, _: _modified_step(self, truncated="error"), "Expects `truncated` signal to be a boolean, actual type: ", ], [ gym.error.Error, lambda self, _: (1, 2, 3), "Expected `Env.step` to return a four or five element tuple, actual number of elements returned: 3.", ], [ UserWarning, lambda self, _: _modified_step(self, reward="error"), "The reward returned by `step()` must be a float, int, np.integer or np.floating, actual type: ", ], [ UserWarning, lambda self, _: _modified_step(self, reward=np.nan), "The reward is a NaN value.", ], [ UserWarning, lambda self, _: _modified_step(self, reward=np.inf), "The reward is an inf value.", ], [ AssertionError, lambda self, _: _modified_step(self, info="error"), "The `info` returned by `step()` must be a python dictionary, actual type: ", ], ], ) def test_passive_env_step_checker( test: Union[UserWarning, type], func: callable, message: str ): """Tests the passive env step checker.""" if test is UserWarning: with pytest.warns( UserWarning, match=f"^\\x1b\\[33mWARN: {re.escape(message)}\\x1b\\[0m$" ): env_step_passive_checker(GenericTestEnv(step_fn=func), 0) else: with warnings.catch_warnings(record=True) as caught_warnings: with pytest.raises(test, match=f"^{re.escape(message)}$"): env_step_passive_checker(GenericTestEnv(step_fn=func), 0) assert len(caught_warnings) == 0, caught_warnings @pytest.mark.parametrize( "test,env,message", [ [ UserWarning, GenericTestEnv(metadata={"render_modes": None}), "No render modes was declared in the environment (env.metadata['render_modes'] is None or not defined), you may have trouble when calling `.render()`.", ], [ UserWarning, GenericTestEnv(metadata={"render_modes": "Testing mode"}), "Expects the render_modes to be a sequence (i.e. list, tuple), actual type: ", ], [ UserWarning, GenericTestEnv( metadata={"render_modes": ["Testing mode", 1], "render_fps": 1}, ), "Expects all render modes to be strings, actual types: [, ]", ], [ UserWarning, GenericTestEnv( metadata={"render_modes": ["Testing mode"], "render_fps": None}, render_mode="Testing mode", render_fn=lambda self: 0, ), "No render fps was declared in the environment (env.metadata['render_fps'] is None or not defined), rendering may occur at inconsistent fps.", ], [ UserWarning, GenericTestEnv( metadata={"render_modes": ["Testing mode"], "render_fps": "fps"} ), "Expects the `env.metadata['render_fps']` to be an integer or a float, actual type: ", ], [ AssertionError, GenericTestEnv( metadata={"render_modes": [], "render_fps": 30}, render_mode="Test" ), "With no render_modes, expects the Env.render_mode to be None, actual value: Test", ], [ AssertionError, GenericTestEnv( metadata={"render_modes": ["Testing mode"], "render_fps": 30}, render_mode="Non mode", ), "The environment was initialized successfully however with an unsupported render mode. Render mode: Non mode, modes: ['Testing mode']", ], ], ) def test_passive_render_checker(test, env: GenericTestEnv, message: str): """Tests the passive render checker.""" if test is UserWarning: with pytest.warns( UserWarning, match=f"^\\x1b\\[33mWARN: {re.escape(message)}\\x1b\\[0m$" ): env_render_passive_checker(env) else: with warnings.catch_warnings(record=True) as caught_warnings: with pytest.raises(test, match=f"^{re.escape(message)}$"): env_render_passive_checker(env) assert len(caught_warnings) == 0 ================================================ FILE: tests/utils/test_play.py ================================================ from functools import partial from itertools import product from typing import Callable import numpy as np import pygame import pytest from pygame import KEYDOWN, KEYUP, QUIT, event from pygame.event import Event import gym from gym.utils.play import MissingKeysToAction, PlayableGame, play from tests.testing_env import GenericTestEnv RELEVANT_KEY_1 = ord("a") # 97 RELEVANT_KEY_2 = ord("d") # 100 IRRELEVANT_KEY = 1 PlayableEnv = partial( GenericTestEnv, metadata={"render_modes": ["rgb_array"]}, render_fn=lambda self: np.ones((10, 10, 3)), ) class KeysToActionWrapper(gym.Wrapper): def __init__(self, env, keys_to_action): super().__init__(env) self.keys_to_action = keys_to_action def get_keys_to_action(self): return self.keys_to_action class PlayStatus: def __init__(self, callback: Callable): self.data_callback = callback self.cumulative_reward = 0 self.last_observation = None def callback(self, obs_t, obs_tp1, action, rew, terminated, truncated, info): _, obs_tp1, _, rew, _, _, _ = self.data_callback( obs_t, obs_tp1, action, rew, terminated, truncated, info ) self.cumulative_reward += rew self.last_observation = obs_tp1 def dummy_keys_to_action(): return {(RELEVANT_KEY_1,): 0, (RELEVANT_KEY_2,): 1} def dummy_keys_to_action_str(): """{'a': 0, 'd': 1}""" return {chr(RELEVANT_KEY_1): 0, chr(RELEVANT_KEY_2): 1} @pytest.fixture(autouse=True) def close_pygame(): yield pygame.quit() def test_play_relevant_keys(): env = PlayableEnv(render_mode="rgb_array") game = PlayableGame(env, dummy_keys_to_action()) assert game.relevant_keys == {RELEVANT_KEY_1, RELEVANT_KEY_2} def test_play_relevant_keys_no_mapping(): env = PlayableEnv(render_mode="rgb_array") with pytest.raises(MissingKeysToAction): PlayableGame(env) def test_play_relevant_keys_with_env_attribute(): """Env has a keys_to_action attribute""" env = PlayableEnv(render_mode="rgb_array") env.get_keys_to_action = dummy_keys_to_action game = PlayableGame(env) assert game.relevant_keys == {RELEVANT_KEY_1, RELEVANT_KEY_2} def test_video_size_no_zoom(): env = PlayableEnv(render_mode="rgb_array") game = PlayableGame(env, dummy_keys_to_action()) assert game.video_size == env.render().shape[:2] def test_video_size_zoom(): env = PlayableEnv(render_mode="rgb_array") zoom = 2.2 game = PlayableGame(env, dummy_keys_to_action(), zoom) assert game.video_size == tuple(int(dim * zoom) for dim in env.render().shape[:2]) def test_keyboard_quit_event(): env = PlayableEnv(render_mode="rgb_array") game = PlayableGame(env, dummy_keys_to_action()) event = Event(pygame.KEYDOWN, {"key": pygame.K_ESCAPE}) assert game.running is True game.process_event(event) assert game.running is False def test_pygame_quit_event(): env = PlayableEnv(render_mode="rgb_array") game = PlayableGame(env, dummy_keys_to_action()) event = Event(pygame.QUIT) assert game.running is True game.process_event(event) assert game.running is False def test_keyboard_relevant_keydown_event(): env = PlayableEnv(render_mode="rgb_array") game = PlayableGame(env, dummy_keys_to_action()) event = Event(pygame.KEYDOWN, {"key": RELEVANT_KEY_1}) game.process_event(event) assert game.pressed_keys == [RELEVANT_KEY_1] def test_keyboard_irrelevant_keydown_event(): env = PlayableEnv(render_mode="rgb_array") game = PlayableGame(env, dummy_keys_to_action()) event = Event(pygame.KEYDOWN, {"key": IRRELEVANT_KEY}) game.process_event(event) assert game.pressed_keys == [] def test_keyboard_keyup_event(): env = PlayableEnv(render_mode="rgb_array") game = PlayableGame(env, dummy_keys_to_action()) event = Event(pygame.KEYDOWN, {"key": RELEVANT_KEY_1}) game.process_event(event) event = Event(pygame.KEYUP, {"key": RELEVANT_KEY_1}) game.process_event(event) assert game.pressed_keys == [] def test_play_loop_real_env(): SEED = 42 ENV = "CartPole-v1" # If apply_wrapper is true, we provide keys_to_action through the environment. If str_keys is true, the # keys_to_action dictionary will have strings as keys for apply_wrapper, str_keys in product([False, True], [False, True]): # set of key events to inject into the play loop as callback callback_events = [ Event(KEYDOWN, {"key": RELEVANT_KEY_1}), Event(KEYUP, {"key": RELEVANT_KEY_1}), Event(KEYDOWN, {"key": RELEVANT_KEY_2}), Event(KEYUP, {"key": RELEVANT_KEY_2}), Event(KEYDOWN, {"key": RELEVANT_KEY_1}), Event(KEYUP, {"key": RELEVANT_KEY_1}), Event(KEYDOWN, {"key": RELEVANT_KEY_1}), Event(KEYUP, {"key": RELEVANT_KEY_1}), Event(KEYDOWN, {"key": RELEVANT_KEY_2}), Event(KEYUP, {"key": RELEVANT_KEY_2}), Event(QUIT), ] keydown_events = [k for k in callback_events if k.type == KEYDOWN] def callback(obs_t, obs_tp1, action, rew, terminated, truncated, info): pygame_event = callback_events.pop(0) event.post(pygame_event) # after releasing a key, post new events until # we have one keydown while pygame_event.type == KEYUP: pygame_event = callback_events.pop(0) event.post(pygame_event) return obs_t, obs_tp1, action, rew, terminated, truncated, info env = gym.make(ENV, render_mode="rgb_array", disable_env_checker=True) env.reset(seed=SEED) keys_to_action = ( dummy_keys_to_action_str() if str_keys else dummy_keys_to_action() ) # first action is 0 because at the first iteration # we can not inject a callback event into play() obs, _, _, _, _ = env.step(0) for e in keydown_events: action = keys_to_action[chr(e.key) if str_keys else (e.key,)] obs, _, _, _, _ = env.step(action) env_play = gym.make(ENV, render_mode="rgb_array", disable_env_checker=True) if apply_wrapper: env_play = KeysToActionWrapper(env, keys_to_action=keys_to_action) assert hasattr(env_play, "get_keys_to_action") status = PlayStatus(callback) play( env_play, callback=status.callback, keys_to_action=None if apply_wrapper else keys_to_action, seed=SEED, ) assert (status.last_observation == obs).all() def test_play_no_keys(): with pytest.raises(MissingKeysToAction): play(gym.make("CartPole-v1")) ================================================ FILE: tests/utils/test_save_video.py ================================================ import os import shutil import numpy as np import gym from gym.utils.save_video import capped_cubic_video_schedule, save_video def test_record_video_using_default_trigger(): env = gym.make( "CartPole-v1", render_mode="rgb_array_list", disable_env_checker=True ) env.reset() step_starting_index = 0 episode_index = 0 for step_index in range(199): action = env.action_space.sample() _, _, terminated, truncated, _ = env.step(action) if terminated or truncated: save_video( env.render(), "videos", fps=env.metadata["render_fps"], step_starting_index=step_starting_index, episode_index=episode_index, ) step_starting_index = step_index + 1 episode_index += 1 env.reset() env.close() assert os.path.isdir("videos") mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")] shutil.rmtree("videos") assert len(mp4_files) == sum( capped_cubic_video_schedule(i) for i in range(episode_index) ) def modulo_step_trigger(mod: int): def step_trigger(step_index): return step_index % mod == 0 return step_trigger def test_record_video_step_trigger(): env = gym.make("CartPole-v1", render_mode="rgb_array_list") env._max_episode_steps = 20 env.reset() step_starting_index = 0 episode_index = 0 for step_index in range(199): action = env.action_space.sample() _, _, terminated, truncated, _ = env.step(action) if terminated or truncated: save_video( env.render(), "videos", fps=env.metadata["render_fps"], step_trigger=modulo_step_trigger(100), step_starting_index=step_starting_index, episode_index=episode_index, ) step_starting_index = step_index + 1 episode_index += 1 env.reset() env.close() assert os.path.isdir("videos") mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")] shutil.rmtree("videos") assert len(mp4_files) == 2 def test_record_video_within_vector(): step_trigger = modulo_step_trigger(100) n_steps = 199 expected_video = 2 envs = gym.vector.make( "CartPole-v1", num_envs=2, asynchronous=True, render_mode="rgb_array_list" ) envs.reset() episode_frames = [] step_starting_index = 0 episode_index = 0 for step_index in range(n_steps): _, _, terminated, truncated, _ = envs.step(envs.action_space.sample()) episode_frames.extend(envs.call("render")[0]) if np.any(np.logical_or(terminated, truncated)): save_video( episode_frames, "videos", fps=envs.metadata["render_fps"], step_trigger=step_trigger, step_starting_index=step_starting_index, episode_index=episode_index, ) episode_frames = [] step_starting_index = step_index + 1 episode_index += 1 # TODO: fix this test (see https://github.com/openai/gym/issues/3054) if step_trigger(step_index): expected_video -= 1 envs.close() assert os.path.isdir("videos") mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")] shutil.rmtree("videos") assert len(mp4_files) == expected_video ================================================ FILE: tests/utils/test_seeding.py ================================================ import pickle from gym import error from gym.utils import seeding def test_invalid_seeds(): for seed in [-1, "test"]: try: seeding.np_random(seed) except error.Error: pass else: assert False, f"Invalid seed {seed} passed validation" def test_valid_seeds(): for seed in [0, 1]: random, seed1 = seeding.np_random(seed) assert seed == seed1 def test_rng_pickle(): rng, _ = seeding.np_random(seed=0) pickled = pickle.dumps(rng) rng2 = pickle.loads(pickled) assert isinstance( rng2, seeding.RandomNumberGenerator ), "Unpickled object is not a RandomNumberGenerator" assert rng.random() == rng2.random() ================================================ FILE: tests/utils/test_step_api_compatibility.py ================================================ import numpy as np import pytest from gym.utils.env_checker import data_equivalence from gym.utils.step_api_compatibility import ( convert_to_done_step_api, convert_to_terminated_truncated_step_api, ) @pytest.mark.parametrize( "is_vector_env, done_returns, expected_terminated, expected_truncated", ( # Test each of the permutations for single environments with and without the old info (False, (0, 0, False, {"Test-info": True}), False, False), (False, (0, 0, False, {"TimeLimit.truncated": False}), False, False), (False, (0, 0, True, {}), True, False), (False, (0, 0, True, {"TimeLimit.truncated": True}), False, True), (False, (0, 0, True, {"Test-info": True}), True, False), # Test vectorise versions with both list and dict infos testing each permutation for sub-environments ( True, ( 0, 0, np.array([False, True, True]), [{}, {}, {"TimeLimit.truncated": True}], ), np.array([False, True, False]), np.array([False, False, True]), ), ( True, ( 0, 0, np.array([False, True, True]), {"TimeLimit.truncated": np.array([False, False, True])}, ), np.array([False, True, False]), np.array([False, False, True]), ), # empty truncated info ( True, ( 0, 0, np.array([False, True]), {}, ), np.array([False, True]), np.array([False, False]), ), ), ) def test_to_done_step_api( is_vector_env, done_returns, expected_terminated, expected_truncated ): _, _, terminated, truncated, info = convert_to_terminated_truncated_step_api( done_returns, is_vector_env=is_vector_env ) assert np.all(terminated == expected_terminated) assert np.all(truncated == expected_truncated) if is_vector_env is False: assert "TimeLimit.truncated" not in info elif isinstance(info, list): assert all("TimeLimit.truncated" not in sub_info for sub_info in info) else: # isinstance(info, dict) assert "TimeLimit.truncated" not in info roundtripped_returns = convert_to_done_step_api( (0, 0, terminated, truncated, info), is_vector_env=is_vector_env ) assert data_equivalence(done_returns, roundtripped_returns) @pytest.mark.parametrize( "is_vector_env, terminated_truncated_returns, expected_done, expected_truncated", ( (False, (0, 0, False, False, {"Test-info": True}), False, False), (False, (0, 0, True, False, {}), True, False), (False, (0, 0, False, True, {}), True, True), # (False, (), True, True), # Not possible to encode in the old step api # Test vector dict info ( True, (0, 0, np.array([False, True, False]), np.array([False, False, True]), {}), np.array([False, True, True]), np.array([False, False, True]), ), # Test vector dict info with no truncation ( True, (0, 0, np.array([False, True]), np.array([False, False]), {}), np.array([False, True]), np.array([False, False]), ), # Test vector list info ( True, ( 0, 0, np.array([False, True, False]), np.array([False, False, True]), [{"Test-Info": True}, {}, {}], ), np.array([False, True, True]), np.array([False, False, True]), ), ), ) def test_to_terminated_truncated_step_api( is_vector_env, terminated_truncated_returns, expected_done, expected_truncated ): _, _, done, info = convert_to_done_step_api( terminated_truncated_returns, is_vector_env=is_vector_env ) assert np.all(done == expected_done) if is_vector_env is False: if expected_done: assert info["TimeLimit.truncated"] == expected_truncated else: assert "TimeLimit.truncated" not in info elif isinstance(info, list): for sub_info, env_done, env_truncated in zip( info, expected_done, expected_truncated ): if env_done: assert sub_info["TimeLimit.truncated"] == env_truncated else: assert "TimeLimit.truncated" not in sub_info else: # isinstance(info, dict) if np.any(expected_done): assert np.all(info["TimeLimit.truncated"] == expected_truncated) else: assert "TimeLimit.truncated" not in info roundtripped_returns = convert_to_terminated_truncated_step_api( (0, 0, done, info), is_vector_env=is_vector_env ) assert data_equivalence(terminated_truncated_returns, roundtripped_returns) def test_edge_case(): # When converting between the two-step APIs this is not possible in a single case # terminated=True and truncated=True -> done=True and info={} # We cannot test this in test_to_terminated_truncated_step_api as the roundtripping test will fail _, _, done, info = convert_to_done_step_api((0, 0, True, True, {})) assert done is True assert info == {"TimeLimit.truncated": False} # Test with vector dict info _, _, done, info = convert_to_done_step_api( (0, 0, np.array([True]), np.array([True]), {}), is_vector_env=True ) assert np.all(done) assert info == {"TimeLimit.truncated": np.array([False])} # Test with vector list info _, _, done, info = convert_to_done_step_api( (0, 0, np.array([True]), np.array([True]), [{"Test-Info": True}]), is_vector_env=True, ) assert np.all(done) assert info == [{"Test-Info": True, "TimeLimit.truncated": False}] ================================================ FILE: tests/vector/__init__.py ================================================ ================================================ FILE: tests/vector/test_async_vector_env.py ================================================ import re from multiprocessing import TimeoutError import numpy as np import pytest from gym.error import AlreadyPendingCallError, ClosedEnvironmentError, NoAsyncCallError from gym.spaces import Box, Discrete, MultiDiscrete, Tuple from gym.vector.async_vector_env import AsyncVectorEnv from tests.vector.utils import ( CustomSpace, make_custom_space_env, make_env, make_slow_env, ) @pytest.mark.parametrize("shared_memory", [True, False]) def test_create_async_vector_env(shared_memory): env_fns = [make_env("CartPole-v1", i) for i in range(8)] env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) assert env.num_envs == 8 env.close() @pytest.mark.parametrize("shared_memory", [True, False]) def test_reset_async_vector_env(shared_memory): env_fns = [make_env("CartPole-v1", i) for i in range(8)] env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) observations, infos = env.reset() env.close() assert isinstance(env.observation_space, Box) assert isinstance(observations, np.ndarray) assert observations.dtype == env.observation_space.dtype assert observations.shape == (8,) + env.single_observation_space.shape assert observations.shape == env.observation_space.shape try: env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) observations, infos = env.reset() finally: env.close() assert isinstance(env.observation_space, Box) assert isinstance(observations, np.ndarray) assert observations.dtype == env.observation_space.dtype assert observations.shape == (8,) + env.single_observation_space.shape assert observations.shape == env.observation_space.shape assert isinstance(infos, dict) assert all([isinstance(info, dict) for info in infos]) @pytest.mark.parametrize("shared_memory", [True, False]) @pytest.mark.parametrize("use_single_action_space", [True, False]) def test_step_async_vector_env(shared_memory, use_single_action_space): env_fns = [make_env("CartPole-v1", i) for i in range(8)] env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) observations = env.reset() assert isinstance(env.single_action_space, Discrete) assert isinstance(env.action_space, MultiDiscrete) if use_single_action_space: actions = [env.single_action_space.sample() for _ in range(8)] else: actions = env.action_space.sample() observations, rewards, terminateds, truncateds, _ = env.step(actions) env.close() assert isinstance(env.observation_space, Box) assert isinstance(observations, np.ndarray) assert observations.dtype == env.observation_space.dtype assert observations.shape == (8,) + env.single_observation_space.shape assert observations.shape == env.observation_space.shape assert isinstance(rewards, np.ndarray) assert isinstance(rewards[0], (float, np.floating)) assert rewards.ndim == 1 assert rewards.size == 8 assert isinstance(terminateds, np.ndarray) assert terminateds.dtype == np.bool_ assert terminateds.ndim == 1 assert terminateds.size == 8 assert isinstance(truncateds, np.ndarray) assert truncateds.dtype == np.bool_ assert truncateds.ndim == 1 assert truncateds.size == 8 @pytest.mark.parametrize("shared_memory", [True, False]) def test_call_async_vector_env(shared_memory): env_fns = [ make_env("CartPole-v1", i, render_mode="rgb_array_list") for i in range(4) ] env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) _ = env.reset() images = env.call("render") gravity = env.call("gravity") env.close() assert isinstance(images, tuple) assert len(images) == 4 for i in range(4): assert len(images[i]) == 1 assert isinstance(images[i][0], np.ndarray) assert isinstance(gravity, tuple) assert len(gravity) == 4 for i in range(4): assert isinstance(gravity[i], float) assert gravity[i] == 9.8 @pytest.mark.parametrize("shared_memory", [True, False]) def test_set_attr_async_vector_env(shared_memory): env_fns = [make_env("CartPole-v1", i) for i in range(4)] env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) env.set_attr("gravity", [9.81, 3.72, 8.87, 1.62]) gravity = env.get_attr("gravity") assert gravity == (9.81, 3.72, 8.87, 1.62) env.close() @pytest.mark.parametrize("shared_memory", [True, False]) def test_copy_async_vector_env(shared_memory): env_fns = [make_env("CartPole-v1", i) for i in range(8)] # TODO, these tests do nothing, understand the purpose of the tests and fix them env = AsyncVectorEnv(env_fns, shared_memory=shared_memory, copy=True) observations, infos = env.reset() observations[0] = 0 env.close() @pytest.mark.parametrize("shared_memory", [True, False]) def test_no_copy_async_vector_env(shared_memory): env_fns = [make_env("CartPole-v1", i) for i in range(8)] # TODO, these tests do nothing, understand the purpose of the tests and fix them env = AsyncVectorEnv(env_fns, shared_memory=shared_memory, copy=False) observations, infos = env.reset() observations[0] = 0 env.close() @pytest.mark.parametrize("shared_memory", [True, False]) def test_reset_timeout_async_vector_env(shared_memory): env_fns = [make_slow_env(0.3, i) for i in range(4)] env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) with pytest.raises(TimeoutError): env.reset_async() env.reset_wait(timeout=0.1) env.close(terminate=True) @pytest.mark.parametrize("shared_memory", [True, False]) def test_step_timeout_async_vector_env(shared_memory): env_fns = [make_slow_env(0.0, i) for i in range(4)] env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) with pytest.raises(TimeoutError): env.reset() env.step_async(np.array([0.1, 0.1, 0.3, 0.1])) observations, rewards, terminateds, truncateds, _ = env.step_wait(timeout=0.1) env.close(terminate=True) @pytest.mark.parametrize("shared_memory", [True, False]) def test_reset_out_of_order_async_vector_env(shared_memory): env_fns = [make_env("CartPole-v1", i) for i in range(4)] env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) with pytest.raises( NoAsyncCallError, match=re.escape( "Calling `reset_wait` without any prior call to `reset_async`." ), ): env.reset_wait() env.close(terminate=True) env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) with pytest.raises( AlreadyPendingCallError, match=re.escape( "Calling `reset_async` while waiting for a pending call to `step` to complete" ), ): actions = env.action_space.sample() env.reset() env.step_async(actions) env.reset_async() with pytest.warns( UserWarning, match=re.escape( "Calling `close` while waiting for a pending call to `step` to complete." ), ): env.close(terminate=True) @pytest.mark.parametrize("shared_memory", [True, False]) def test_step_out_of_order_async_vector_env(shared_memory): env_fns = [make_env("CartPole-v1", i) for i in range(4)] env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) with pytest.raises( NoAsyncCallError, match=re.escape("Calling `step_wait` without any prior call to `step_async`."), ): env.action_space.sample() env.reset() env.step_wait() env.close(terminate=True) env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) with pytest.raises( AlreadyPendingCallError, match=re.escape( "Calling `step_async` while waiting for a pending call to `reset` to complete" ), ): actions = env.action_space.sample() env.reset_async() env.step_async(actions) with pytest.warns( UserWarning, match=re.escape( "Calling `close` while waiting for a pending call to `reset` to complete." ), ): env.close(terminate=True) @pytest.mark.parametrize("shared_memory", [True, False]) def test_already_closed_async_vector_env(shared_memory): env_fns = [make_env("CartPole-v1", i) for i in range(4)] with pytest.raises(ClosedEnvironmentError): env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) env.close() env.reset() @pytest.mark.parametrize("shared_memory", [True, False]) def test_check_spaces_async_vector_env(shared_memory): # CartPole-v1 - observation_space: Box(4,), action_space: Discrete(2) env_fns = [make_env("CartPole-v1", i) for i in range(8)] # FrozenLake-v1 - Discrete(16), action_space: Discrete(4) env_fns[1] = make_env("FrozenLake-v1", 1) with pytest.raises(RuntimeError): env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) env.close(terminate=True) def test_custom_space_async_vector_env(): env_fns = [make_custom_space_env(i) for i in range(4)] env = AsyncVectorEnv(env_fns, shared_memory=False) reset_observations, reset_infos = env.reset() assert isinstance(env.single_action_space, CustomSpace) assert isinstance(env.action_space, Tuple) actions = ("action-2", "action-3", "action-5", "action-7") step_observations, rewards, terminateds, truncateds, _ = env.step(actions) env.close() assert isinstance(env.single_observation_space, CustomSpace) assert isinstance(env.observation_space, Tuple) assert isinstance(reset_observations, tuple) assert reset_observations == ("reset", "reset", "reset", "reset") assert isinstance(step_observations, tuple) assert step_observations == ( "step(action-2)", "step(action-3)", "step(action-5)", "step(action-7)", ) def test_custom_space_async_vector_env_shared_memory(): env_fns = [make_custom_space_env(i) for i in range(4)] with pytest.raises(ValueError): env = AsyncVectorEnv(env_fns, shared_memory=True) env.close(terminate=True) ================================================ FILE: tests/vector/test_numpy_utils.py ================================================ from collections import OrderedDict import numpy as np import pytest from gym.spaces import Dict, Tuple from gym.vector.utils.numpy_utils import concatenate, create_empty_array from gym.vector.utils.spaces import BaseGymSpaces from tests.vector.utils import spaces @pytest.mark.parametrize( "space", spaces, ids=[space.__class__.__name__ for space in spaces] ) def test_concatenate(space): def assert_type(lhs, rhs, n): # Special case: if rhs is a list of scalars, lhs must be an np.ndarray if np.isscalar(rhs[0]): assert isinstance(lhs, np.ndarray) assert all([np.isscalar(rhs[i]) for i in range(n)]) else: assert all([isinstance(rhs[i], type(lhs)) for i in range(n)]) def assert_nested_equal(lhs, rhs, n): assert isinstance(rhs, list) assert (n > 0) and (len(rhs) == n) assert_type(lhs, rhs, n) if isinstance(lhs, np.ndarray): assert lhs.shape[0] == n for i in range(n): assert np.all(lhs[i] == rhs[i]) elif isinstance(lhs, tuple): for i in range(len(lhs)): rhs_T_i = [rhs[j][i] for j in range(n)] assert_nested_equal(lhs[i], rhs_T_i, n) elif isinstance(lhs, OrderedDict): for key in lhs.keys(): rhs_T_key = [rhs[j][key] for j in range(n)] assert_nested_equal(lhs[key], rhs_T_key, n) else: raise TypeError(f"Got unknown type `{type(lhs)}`.") samples = [space.sample() for _ in range(8)] array = create_empty_array(space, n=8) concatenated = concatenate(space, samples, array) assert np.all(concatenated == array) assert_nested_equal(array, samples, n=8) @pytest.mark.parametrize("n", [1, 8]) @pytest.mark.parametrize( "space", spaces, ids=[space.__class__.__name__ for space in spaces] ) def test_create_empty_array(space, n): def assert_nested_type(arr, space, n): if isinstance(space, BaseGymSpaces): assert isinstance(arr, np.ndarray) assert arr.dtype == space.dtype assert arr.shape == (n,) + space.shape elif isinstance(space, Tuple): assert isinstance(arr, tuple) assert len(arr) == len(space.spaces) for i in range(len(arr)): assert_nested_type(arr[i], space.spaces[i], n) elif isinstance(space, Dict): assert isinstance(arr, OrderedDict) assert set(arr.keys()) ^ set(space.spaces.keys()) == set() for key in arr.keys(): assert_nested_type(arr[key], space.spaces[key], n) else: raise TypeError(f"Got unknown type `{type(arr)}`.") array = create_empty_array(space, n=n, fn=np.empty) assert_nested_type(array, space, n=n) @pytest.mark.parametrize("n", [1, 8]) @pytest.mark.parametrize( "space", spaces, ids=[space.__class__.__name__ for space in spaces] ) def test_create_empty_array_zeros(space, n): def assert_nested_type(arr, space, n): if isinstance(space, BaseGymSpaces): assert isinstance(arr, np.ndarray) assert arr.dtype == space.dtype assert arr.shape == (n,) + space.shape assert np.all(arr == 0) elif isinstance(space, Tuple): assert isinstance(arr, tuple) assert len(arr) == len(space.spaces) for i in range(len(arr)): assert_nested_type(arr[i], space.spaces[i], n) elif isinstance(space, Dict): assert isinstance(arr, OrderedDict) assert set(arr.keys()) ^ set(space.spaces.keys()) == set() for key in arr.keys(): assert_nested_type(arr[key], space.spaces[key], n) else: raise TypeError(f"Got unknown type `{type(arr)}`.") array = create_empty_array(space, n=n, fn=np.zeros) assert_nested_type(array, space, n=n) @pytest.mark.parametrize( "space", spaces, ids=[space.__class__.__name__ for space in spaces] ) def test_create_empty_array_none_shape_ones(space): def assert_nested_type(arr, space): if isinstance(space, BaseGymSpaces): assert isinstance(arr, np.ndarray) assert arr.dtype == space.dtype assert arr.shape == space.shape assert np.all(arr == 1) elif isinstance(space, Tuple): assert isinstance(arr, tuple) assert len(arr) == len(space.spaces) for i in range(len(arr)): assert_nested_type(arr[i], space.spaces[i]) elif isinstance(space, Dict): assert isinstance(arr, OrderedDict) assert set(arr.keys()) ^ set(space.spaces.keys()) == set() for key in arr.keys(): assert_nested_type(arr[key], space.spaces[key]) else: raise TypeError(f"Got unknown type `{type(arr)}`.") array = create_empty_array(space, n=None, fn=np.ones) assert_nested_type(array, space) ================================================ FILE: tests/vector/test_shared_memory.py ================================================ import multiprocessing as mp from collections import OrderedDict from multiprocessing import Array, Process from multiprocessing.sharedctypes import SynchronizedArray import numpy as np import pytest from gym.error import CustomSpaceError from gym.spaces import Dict, Tuple from gym.vector.utils.shared_memory import ( create_shared_memory, read_from_shared_memory, write_to_shared_memory, ) from gym.vector.utils.spaces import BaseGymSpaces from tests.vector.utils import custom_spaces, spaces expected_types = [ Array("d", 1), Array("f", 1), Array("f", 3), Array("f", 4), Array("B", 1), Array("B", 32 * 32 * 3), Array("i", 1), Array("i", 1), (Array("i", 1), Array("i", 1)), (Array("i", 1), Array("f", 2)), Array("B", 3), Array("B", 19), OrderedDict([("position", Array("i", 1)), ("velocity", Array("f", 1))]), OrderedDict( [ ("position", OrderedDict([("x", Array("i", 1)), ("y", Array("i", 1))])), ("velocity", (Array("i", 1), Array("B", 1))), ] ), ] @pytest.mark.parametrize("n", [1, 8]) @pytest.mark.parametrize( "space,expected_type", list(zip(spaces, expected_types)), ids=[space.__class__.__name__ for space in spaces], ) @pytest.mark.parametrize( "ctx", [None, "fork", "spawn"], ids=["default", "fork", "spawn"] ) def test_create_shared_memory(space, expected_type, n, ctx): def assert_nested_type(lhs, rhs, n): assert type(lhs) == type(rhs) if isinstance(lhs, (list, tuple)): assert len(lhs) == len(rhs) for lhs_, rhs_ in zip(lhs, rhs): assert_nested_type(lhs_, rhs_, n) elif isinstance(lhs, (dict, OrderedDict)): assert set(lhs.keys()) ^ set(rhs.keys()) == set() for key in lhs.keys(): assert_nested_type(lhs[key], rhs[key], n) elif isinstance(lhs, SynchronizedArray): # Assert the length of the array assert len(lhs[:]) == n * len(rhs[:]) # Assert the data type assert isinstance(lhs[0], type(rhs[0])) else: raise TypeError(f"Got unknown type `{type(lhs)}`.") ctx = mp if (ctx is None) else mp.get_context(ctx) shared_memory = create_shared_memory(space, n=n, ctx=ctx) assert_nested_type(shared_memory, expected_type, n=n) @pytest.mark.parametrize("n", [1, 8]) @pytest.mark.parametrize( "ctx", [None, "fork", "spawn"], ids=["default", "fork", "spawn"] ) @pytest.mark.parametrize("space", custom_spaces) def test_create_shared_memory_custom_space(n, ctx, space): ctx = mp if (ctx is None) else mp.get_context(ctx) with pytest.raises(CustomSpaceError): create_shared_memory(space, n=n, ctx=ctx) def _write_shared_memory(space, i, shared_memory, sample): write_to_shared_memory(space, i, sample, shared_memory) @pytest.mark.parametrize( "space", spaces, ids=[space.__class__.__name__ for space in spaces] ) def test_write_to_shared_memory(space): def assert_nested_equal(lhs, rhs): assert isinstance(rhs, list) if isinstance(lhs, (list, tuple)): for i in range(len(lhs)): assert_nested_equal(lhs[i], [rhs_[i] for rhs_ in rhs]) elif isinstance(lhs, (dict, OrderedDict)): for key in lhs.keys(): assert_nested_equal(lhs[key], [rhs_[key] for rhs_ in rhs]) elif isinstance(lhs, SynchronizedArray): assert np.all(np.array(lhs[:]) == np.stack(rhs, axis=0).flatten()) else: raise TypeError(f"Got unknown type `{type(lhs)}`.") shared_memory_n8 = create_shared_memory(space, n=8) samples = [space.sample() for _ in range(8)] processes = [ Process( target=_write_shared_memory, args=(space, i, shared_memory_n8, samples[i]) ) for i in range(8) ] for process in processes: process.start() for process in processes: process.join() assert_nested_equal(shared_memory_n8, samples) def _process_write(space, i, shared_memory, sample): write_to_shared_memory(space, i, sample, shared_memory) @pytest.mark.parametrize( "space", spaces, ids=[space.__class__.__name__ for space in spaces] ) def test_read_from_shared_memory(space): def assert_nested_equal(lhs, rhs, space, n): assert isinstance(rhs, list) if isinstance(space, Tuple): assert isinstance(lhs, tuple) for i in range(len(lhs)): assert_nested_equal( lhs[i], [rhs_[i] for rhs_ in rhs], space.spaces[i], n ) elif isinstance(space, Dict): assert isinstance(lhs, OrderedDict) for key in lhs.keys(): assert_nested_equal( lhs[key], [rhs_[key] for rhs_ in rhs], space.spaces[key], n ) elif isinstance(space, BaseGymSpaces): assert isinstance(lhs, np.ndarray) assert lhs.shape == ((n,) + space.shape) assert lhs.dtype == space.dtype assert np.all(lhs == np.stack(rhs, axis=0)) else: raise TypeError(f"Got unknown type `{type(space)}`") shared_memory_n8 = create_shared_memory(space, n=8) memory_view_n8 = read_from_shared_memory(space, shared_memory_n8, n=8) samples = [space.sample() for _ in range(8)] processes = [ Process(target=_process_write, args=(space, i, shared_memory_n8, samples[i])) for i in range(8) ] for process in processes: process.start() for process in processes: process.join() assert_nested_equal(memory_view_n8, samples, space, n=8) ================================================ FILE: tests/vector/test_spaces.py ================================================ import copy import numpy as np import pytest from numpy.testing import assert_array_equal from gym.spaces import Box, Dict, MultiDiscrete, Space, Tuple from gym.vector.utils.spaces import batch_space, iterate from tests.vector.utils import CustomSpace, assert_rng_equal, custom_spaces, spaces expected_batch_spaces_4 = [ Box(low=-1.0, high=1.0, shape=(4,), dtype=np.float64), Box(low=0.0, high=10.0, shape=(4, 1), dtype=np.float64), Box( low=np.array( [[-1.0, 0.0, 0.0], [-1.0, 0.0, 0.0], [-1.0, 0.0, 0.0], [-1.0, 0.0, 0.0]] ), high=np.array( [[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]] ), dtype=np.float64, ), Box( low=np.array( [ [[-1.0, 0.0], [0.0, -1.0]], [[-1.0, 0.0], [0.0, -1.0]], [[-1.0, 0.0], [0.0, -1]], [[-1.0, 0.0], [0.0, -1.0]], ] ), high=np.ones((4, 2, 2)), dtype=np.float64, ), Box(low=0, high=255, shape=(4,), dtype=np.uint8), Box(low=0, high=255, shape=(4, 32, 32, 3), dtype=np.uint8), MultiDiscrete([2, 2, 2, 2]), Box(low=-2, high=2, shape=(4,), dtype=np.int64), Tuple((MultiDiscrete([3, 3, 3, 3]), MultiDiscrete([5, 5, 5, 5]))), Tuple( ( MultiDiscrete([7, 7, 7, 7]), Box( low=np.array([[0.0, -1.0], [0.0, -1.0], [0.0, -1.0], [0.0, -1]]), high=np.array([[1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]]), dtype=np.float64, ), ) ), Box( low=np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]]), high=np.array([[10, 12, 16], [10, 12, 16], [10, 12, 16], [10, 12, 16]]), dtype=np.int64, ), Box(low=0, high=1, shape=(4, 19), dtype=np.int8), Dict( { "position": MultiDiscrete([23, 23, 23, 23]), "velocity": Box(low=0.0, high=1.0, shape=(4, 1), dtype=np.float64), } ), Dict( { "position": Dict( { "x": MultiDiscrete([29, 29, 29, 29]), "y": MultiDiscrete([31, 31, 31, 31]), } ), "velocity": Tuple( ( MultiDiscrete([37, 37, 37, 37]), Box(low=0, high=255, shape=(4,), dtype=np.uint8), ) ), } ), ] expected_custom_batch_spaces_4 = [ Tuple((CustomSpace(), CustomSpace(), CustomSpace(), CustomSpace())), Tuple( ( Tuple((CustomSpace(), CustomSpace(), CustomSpace(), CustomSpace())), Box(low=0, high=255, shape=(4,), dtype=np.uint8), ) ), ] @pytest.mark.parametrize( "space,expected_batch_space_4", list(zip(spaces, expected_batch_spaces_4)), ids=[space.__class__.__name__ for space in spaces], ) def test_batch_space(space, expected_batch_space_4): batch_space_4 = batch_space(space, n=4) assert batch_space_4 == expected_batch_space_4 @pytest.mark.parametrize( "space,expected_batch_space_4", list(zip(custom_spaces, expected_custom_batch_spaces_4)), ids=[space.__class__.__name__ for space in custom_spaces], ) def test_batch_space_custom_space(space, expected_batch_space_4): batch_space_4 = batch_space(space, n=4) assert batch_space_4 == expected_batch_space_4 @pytest.mark.parametrize( "space,batch_space", list(zip(spaces, expected_batch_spaces_4)), ids=[space.__class__.__name__ for space in spaces], ) def test_iterate(space, batch_space): items = batch_space.sample() iterator = iterate(batch_space, items) i = 0 for i, item in enumerate(iterator): assert item in space assert i == 3 @pytest.mark.parametrize( "space,batch_space", list(zip(custom_spaces, expected_custom_batch_spaces_4)), ids=[space.__class__.__name__ for space in custom_spaces], ) def test_iterate_custom_space(space, batch_space): items = batch_space.sample() iterator = iterate(batch_space, items) i = 0 for i, item in enumerate(iterator): assert item in space assert i == 3 @pytest.mark.parametrize( "space", spaces, ids=[space.__class__.__name__ for space in spaces] ) @pytest.mark.parametrize("n", [4, 5], ids=[f"n={n}" for n in [4, 5]]) @pytest.mark.parametrize( "base_seed", [123, 456], ids=[f"seed={base_seed}" for base_seed in [123, 456]] ) def test_rng_different_at_each_index(space: Space, n: int, base_seed: int): """ Tests that the rng values produced at each index are different to prevent if the rng is copied for each subspace """ space.seed(base_seed) batched_space = batch_space(space, n) assert space.np_random is not batched_space.np_random assert_rng_equal(space.np_random, batched_space.np_random) batched_sample = batched_space.sample() sample = list(iterate(batched_space, batched_sample)) assert not all(np.all(element == sample[0]) for element in sample), sample @pytest.mark.parametrize( "space", spaces, ids=[space.__class__.__name__ for space in spaces] ) @pytest.mark.parametrize("n", [1, 2, 5], ids=[f"n={n}" for n in [1, 2, 5]]) @pytest.mark.parametrize( "base_seed", [123, 456], ids=[f"seed={base_seed}" for base_seed in [123, 456]] ) def test_deterministic(space: Space, n: int, base_seed: int): """Tests the batched spaces are deterministic by using a copied version""" # Copy the spaces and check that the np_random are not reference equal space_a = space space_a.seed(base_seed) space_b = copy.deepcopy(space_a) assert_rng_equal(space_a.np_random, space_b.np_random) assert space_a.np_random is not space_b.np_random # Batch the spaces and check that the np_random are not reference equal space_a_batched = batch_space(space_a, n) space_b_batched = batch_space(space_b, n) assert_rng_equal(space_a_batched.np_random, space_b_batched.np_random) assert space_a_batched.np_random is not space_b_batched.np_random # Create that the batched space is not reference equal to the origin spaces assert space_a.np_random is not space_a_batched.np_random # Check that batched space a and b random number generator are not effected by the original space space_a.sample() space_a_batched_sample = space_a_batched.sample() space_b_batched_sample = space_b_batched.sample() for a_sample, b_sample in zip( iterate(space_a_batched, space_a_batched_sample), iterate(space_b_batched, space_b_batched_sample), ): if isinstance(a_sample, tuple): assert len(a_sample) == len(b_sample) for a_subsample, b_subsample in zip(a_sample, b_sample): assert_array_equal(a_subsample, b_subsample) else: assert_array_equal(a_sample, b_sample) ================================================ FILE: tests/vector/test_sync_vector_env.py ================================================ import numpy as np import pytest from gym.envs.registration import EnvSpec from gym.spaces import Box, Discrete, MultiDiscrete, Tuple from gym.vector.sync_vector_env import SyncVectorEnv from tests.envs.utils import all_testing_env_specs from tests.vector.utils import ( CustomSpace, assert_rng_equal, make_custom_space_env, make_env, ) def test_create_sync_vector_env(): env_fns = [make_env("FrozenLake-v1", i) for i in range(8)] env = SyncVectorEnv(env_fns) env.close() assert env.num_envs == 8 def test_reset_sync_vector_env(): env_fns = [make_env("CartPole-v1", i) for i in range(8)] env = SyncVectorEnv(env_fns) observations, infos = env.reset() env.close() assert isinstance(env.observation_space, Box) assert isinstance(observations, np.ndarray) assert observations.dtype == env.observation_space.dtype assert observations.shape == (8,) + env.single_observation_space.shape assert observations.shape == env.observation_space.shape del observations @pytest.mark.parametrize("use_single_action_space", [True, False]) def test_step_sync_vector_env(use_single_action_space): env_fns = [make_env("FrozenLake-v1", i) for i in range(8)] env = SyncVectorEnv(env_fns) observations = env.reset() assert isinstance(env.single_action_space, Discrete) assert isinstance(env.action_space, MultiDiscrete) if use_single_action_space: actions = [env.single_action_space.sample() for _ in range(8)] else: actions = env.action_space.sample() observations, rewards, terminateds, truncateds, _ = env.step(actions) env.close() assert isinstance(env.observation_space, MultiDiscrete) assert isinstance(observations, np.ndarray) assert observations.dtype == env.observation_space.dtype assert observations.shape == (8,) + env.single_observation_space.shape assert observations.shape == env.observation_space.shape assert isinstance(rewards, np.ndarray) assert isinstance(rewards[0], (float, np.floating)) assert rewards.ndim == 1 assert rewards.size == 8 assert isinstance(terminateds, np.ndarray) assert terminateds.dtype == np.bool_ assert terminateds.ndim == 1 assert terminateds.size == 8 assert isinstance(truncateds, np.ndarray) assert truncateds.dtype == np.bool_ assert truncateds.ndim == 1 assert truncateds.size == 8 def test_call_sync_vector_env(): env_fns = [ make_env("CartPole-v1", i, render_mode="rgb_array_list") for i in range(4) ] env = SyncVectorEnv(env_fns) _ = env.reset() images = env.call("render") gravity = env.call("gravity") env.close() assert isinstance(images, tuple) assert len(images) == 4 for i in range(4): assert len(images[i]) == 1 assert isinstance(images[i][0], np.ndarray) assert isinstance(gravity, tuple) assert len(gravity) == 4 for i in range(4): assert isinstance(gravity[i], float) assert gravity[i] == 9.8 def test_set_attr_sync_vector_env(): env_fns = [make_env("CartPole-v1", i) for i in range(4)] env = SyncVectorEnv(env_fns) env.set_attr("gravity", [9.81, 3.72, 8.87, 1.62]) gravity = env.get_attr("gravity") assert gravity == (9.81, 3.72, 8.87, 1.62) env.close() def test_check_spaces_sync_vector_env(): # CartPole-v1 - observation_space: Box(4,), action_space: Discrete(2) env_fns = [make_env("CartPole-v1", i) for i in range(8)] # FrozenLake-v1 - Discrete(16), action_space: Discrete(4) env_fns[1] = make_env("FrozenLake-v1", 1) with pytest.raises(RuntimeError): env = SyncVectorEnv(env_fns) env.close() def test_custom_space_sync_vector_env(): env_fns = [make_custom_space_env(i) for i in range(4)] env = SyncVectorEnv(env_fns) reset_observations, infos = env.reset() assert isinstance(env.single_action_space, CustomSpace) assert isinstance(env.action_space, Tuple) actions = ("action-2", "action-3", "action-5", "action-7") step_observations, rewards, terminateds, truncateds, _ = env.step(actions) env.close() assert isinstance(env.single_observation_space, CustomSpace) assert isinstance(env.observation_space, Tuple) assert isinstance(reset_observations, tuple) assert reset_observations == ("reset", "reset", "reset", "reset") assert isinstance(step_observations, tuple) assert step_observations == ( "step(action-2)", "step(action-3)", "step(action-5)", "step(action-7)", ) def test_sync_vector_env_seed(): env = make_env("BipedalWalker-v3", seed=123)() sync_vector_env = SyncVectorEnv([make_env("BipedalWalker-v3", seed=123)]) assert_rng_equal(env.action_space.np_random, sync_vector_env.action_space.np_random) for _ in range(100): env_action = env.action_space.sample() vector_action = sync_vector_env.action_space.sample() assert np.all(env_action == vector_action) @pytest.mark.parametrize( "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs] ) def test_sync_vector_determinism(spec: EnvSpec, seed: int = 123, n: int = 3): """Check that for all environments, the sync vector envs produce the same action samples using the same seeds""" env_1 = SyncVectorEnv([make_env(spec.id, seed=seed) for _ in range(n)]) env_2 = SyncVectorEnv([make_env(spec.id, seed=seed) for _ in range(n)]) assert_rng_equal(env_1.action_space.np_random, env_2.action_space.np_random) for _ in range(100): env_1_samples = env_1.action_space.sample() env_2_samples = env_2.action_space.sample() assert np.all(env_1_samples == env_2_samples) ================================================ FILE: tests/vector/test_vector_env.py ================================================ from functools import partial import numpy as np import pytest from gym.spaces import Discrete, Tuple from gym.vector.async_vector_env import AsyncVectorEnv from gym.vector.sync_vector_env import SyncVectorEnv from gym.vector.vector_env import VectorEnv from tests.testing_env import GenericTestEnv from tests.vector.utils import CustomSpace, make_env @pytest.mark.parametrize("shared_memory", [True, False]) def test_vector_env_equal(shared_memory): env_fns = [make_env("CartPole-v1", i) for i in range(4)] num_steps = 100 async_env = AsyncVectorEnv(env_fns, shared_memory=shared_memory) sync_env = SyncVectorEnv(env_fns) assert async_env.num_envs == sync_env.num_envs assert async_env.observation_space == sync_env.observation_space assert async_env.single_observation_space == sync_env.single_observation_space assert async_env.action_space == sync_env.action_space assert async_env.single_action_space == sync_env.single_action_space async_observations, async_infos = async_env.reset(seed=0) sync_observations, sync_infos = sync_env.reset(seed=0) assert np.all(async_observations == sync_observations) for _ in range(num_steps): actions = async_env.action_space.sample() assert actions in sync_env.action_space # fmt: off async_observations, async_rewards, async_terminateds, async_truncateds, async_infos = async_env.step(actions) sync_observations, sync_rewards, sync_terminateds, sync_truncateds, sync_infos = sync_env.step(actions) # fmt: on if any(sync_terminateds) or any(sync_truncateds): assert "final_observation" in async_infos assert "_final_observation" in async_infos assert "final_observation" in sync_infos assert "_final_observation" in sync_infos assert np.all(async_observations == sync_observations) assert np.all(async_rewards == sync_rewards) assert np.all(async_terminateds == sync_terminateds) assert np.all(async_truncateds == sync_truncateds) async_env.close() sync_env.close() def test_custom_space_vector_env(): env = VectorEnv(4, CustomSpace(), CustomSpace()) assert isinstance(env.single_observation_space, CustomSpace) assert isinstance(env.observation_space, Tuple) assert isinstance(env.single_action_space, CustomSpace) assert isinstance(env.action_space, Tuple) @pytest.mark.parametrize( "vectoriser", ( SyncVectorEnv, partial(AsyncVectorEnv, shared_memory=True), partial(AsyncVectorEnv, shared_memory=False), ), ids=["Sync", "Async with shared memory", "Async without shared memory"], ) def test_final_obs_info(vectoriser): """Tests that the vector environments correctly return the final observation and info.""" def reset_fn(self, seed=None, options=None): return 0, {"reset": True} def thunk(): return GenericTestEnv( action_space=Discrete(4), observation_space=Discrete(4), reset_fn=reset_fn, step_fn=lambda self, action: ( action if action < 3 else 0, 0, action >= 3, False, {"action": action}, ), ) env = vectoriser([thunk]) obs, info = env.reset() assert obs == np.array([0]) and info == { "reset": np.array([True]), "_reset": np.array([True]), } obs, _, termination, _, info = env.step([1]) assert ( obs == np.array([1]) and termination == np.array([False]) and info == {"action": np.array([1]), "_action": np.array([True])} ) obs, _, termination, _, info = env.step([2]) assert ( obs == np.array([2]) and termination == np.array([False]) and info == {"action": np.array([2]), "_action": np.array([True])} ) obs, _, termination, _, info = env.step([3]) assert ( obs == np.array([0]) and termination == np.array([True]) and info["reset"] == np.array([True]) ) assert "final_observation" in info and "final_info" in info assert info["final_observation"] == np.array([0]) and info["final_info"] == { "action": 3 } ================================================ FILE: tests/vector/test_vector_env_info.py ================================================ import numpy as np import pytest import gym from gym.vector.sync_vector_env import SyncVectorEnv from tests.vector.utils import make_env ENV_ID = "CartPole-v1" NUM_ENVS = 3 ENV_STEPS = 50 SEED = 42 @pytest.mark.parametrize("asynchronous", [True, False]) def test_vector_env_info(asynchronous): env = gym.vector.make( ENV_ID, num_envs=NUM_ENVS, asynchronous=asynchronous, disable_env_checker=True ) env.reset(seed=SEED) for _ in range(ENV_STEPS): env.action_space.seed(SEED) action = env.action_space.sample() _, _, terminateds, truncateds, infos = env.step(action) if any(terminateds) or any(truncateds): assert len(infos["final_observation"]) == NUM_ENVS assert len(infos["_final_observation"]) == NUM_ENVS assert isinstance(infos["final_observation"], np.ndarray) assert isinstance(infos["_final_observation"], np.ndarray) for i, (terminated, truncated) in enumerate(zip(terminateds, truncateds)): if terminated or truncated: assert infos["_final_observation"][i] else: assert not infos["_final_observation"][i] assert infos["final_observation"][i] is None @pytest.mark.parametrize("concurrent_ends", [1, 2, 3]) def test_vector_env_info_concurrent_termination(concurrent_ends): # envs that need to terminate together will have the same action actions = [0] * concurrent_ends + [1] * (NUM_ENVS - concurrent_ends) envs = [make_env(ENV_ID, SEED) for _ in range(NUM_ENVS)] envs = SyncVectorEnv(envs) for _ in range(ENV_STEPS): _, _, terminateds, truncateds, infos = envs.step(actions) if any(terminateds) or any(truncateds): for i, (terminated, truncated) in enumerate(zip(terminateds, truncateds)): if i < concurrent_ends: assert terminated or truncated assert infos["_final_observation"][i] else: assert not infos["_final_observation"][i] assert infos["final_observation"][i] is None return ================================================ FILE: tests/vector/test_vector_env_wrapper.py ================================================ import numpy as np from gym.vector import VectorEnvWrapper, make class DummyWrapper(VectorEnvWrapper): def __init__(self, env): self.env = env self.counter = 0 def reset_async(self, **kwargs): super().reset_async() self.counter += 1 def test_vector_env_wrapper_inheritance(): env = make("FrozenLake-v1", asynchronous=False) wrapped = DummyWrapper(env) wrapped.reset() assert wrapped.counter == 1 def test_vector_env_wrapper_attributes(): """Test if `set_attr`, `call` methods for VecEnvWrapper get correctly forwarded to the vector env it is wrapping.""" env = make("CartPole-v1", num_envs=3) wrapped = DummyWrapper(make("CartPole-v1", num_envs=3)) assert np.allclose(wrapped.call("gravity"), env.call("gravity")) env.set_attr("gravity", [20.0, 20.0, 20.0]) wrapped.set_attr("gravity", [20.0, 20.0, 20.0]) assert np.allclose(wrapped.get_attr("gravity"), env.get_attr("gravity")) ================================================ FILE: tests/vector/test_vector_make.py ================================================ import pytest import gym from gym.vector import AsyncVectorEnv, SyncVectorEnv from gym.wrappers import OrderEnforcing, TimeLimit, TransformObservation from gym.wrappers.env_checker import PassiveEnvChecker from tests.wrappers.utils import has_wrapper def test_vector_make_id(): env = gym.vector.make("CartPole-v1") assert isinstance(env, AsyncVectorEnv) assert env.num_envs == 1 env.close() @pytest.mark.parametrize("num_envs", [1, 3, 10]) def test_vector_make_num_envs(num_envs): env = gym.vector.make("CartPole-v1", num_envs=num_envs) assert env.num_envs == num_envs env.close() def test_vector_make_asynchronous(): env = gym.vector.make("CartPole-v1", asynchronous=True) assert isinstance(env, AsyncVectorEnv) env.close() env = gym.vector.make("CartPole-v1", asynchronous=False) assert isinstance(env, SyncVectorEnv) env.close() def test_vector_make_wrappers(): env = gym.vector.make("CartPole-v1", num_envs=2, asynchronous=False) assert isinstance(env, SyncVectorEnv) assert len(env.envs) == 2 sub_env = env.envs[0] assert isinstance(sub_env, gym.Env) if sub_env.spec.order_enforce: assert has_wrapper(sub_env, OrderEnforcing) if sub_env.spec.max_episode_steps is not None: assert has_wrapper(sub_env, TimeLimit) assert all( has_wrapper(sub_env, TransformObservation) is False for sub_env in env.envs ) env.close() env = gym.vector.make( "CartPole-v1", num_envs=2, asynchronous=False, wrappers=lambda _env: TransformObservation(_env, lambda obs: obs * 2), ) # As asynchronous environment are inaccessible, synchronous vector must be used assert isinstance(env, SyncVectorEnv) assert all(has_wrapper(sub_env, TransformObservation) for sub_env in env.envs) env.close() def test_vector_make_disable_env_checker(): # As asynchronous environment are inaccessible, synchronous vector must be used env = gym.vector.make("CartPole-v1", num_envs=1, asynchronous=False) assert isinstance(env, SyncVectorEnv) assert has_wrapper(env.envs[0], PassiveEnvChecker) env.close() env = gym.vector.make("CartPole-v1", num_envs=5, asynchronous=False) assert isinstance(env, SyncVectorEnv) assert has_wrapper(env.envs[0], PassiveEnvChecker) assert all( has_wrapper(env.envs[i], PassiveEnvChecker) is False for i in [1, 2, 3, 4] ) env.close() env = gym.vector.make( "CartPole-v1", num_envs=3, asynchronous=False, disable_env_checker=True ) assert isinstance(env, SyncVectorEnv) assert all(has_wrapper(sub_env, PassiveEnvChecker) is False for sub_env in env.envs) env.close() ================================================ FILE: tests/vector/utils.py ================================================ import time from typing import Optional import numpy as np import gym from gym.spaces import Box, Dict, Discrete, MultiBinary, MultiDiscrete, Tuple from gym.utils.seeding import RandomNumberGenerator spaces = [ Box(low=np.array(-1.0), high=np.array(1.0), dtype=np.float64), Box(low=np.array([0.0]), high=np.array([10.0]), dtype=np.float64), Box( low=np.array([-1.0, 0.0, 0.0]), high=np.array([1.0, 1.0, 1.0]), dtype=np.float64 ), Box( low=np.array([[-1.0, 0.0], [0.0, -1.0]]), high=np.ones((2, 2)), dtype=np.float64 ), Box(low=0, high=255, shape=(), dtype=np.uint8), Box(low=0, high=255, shape=(32, 32, 3), dtype=np.uint8), Discrete(2), Discrete(5, start=-2), Tuple((Discrete(3), Discrete(5))), Tuple( ( Discrete(7), Box(low=np.array([0.0, -1.0]), high=np.array([1.0, 1.0]), dtype=np.float64), ) ), MultiDiscrete([11, 13, 17]), MultiBinary(19), Dict( { "position": Discrete(23), "velocity": Box( low=np.array([0.0]), high=np.array([1.0]), dtype=np.float64 ), } ), Dict( { "position": Dict({"x": Discrete(29), "y": Discrete(31)}), "velocity": Tuple( (Discrete(37), Box(low=0, high=255, shape=(), dtype=np.uint8)) ), } ), ] HEIGHT, WIDTH = 64, 64 class UnittestSlowEnv(gym.Env): def __init__(self, slow_reset=0.3): super().__init__() self.slow_reset = slow_reset self.observation_space = Box( low=0, high=255, shape=(HEIGHT, WIDTH, 3), dtype=np.uint8 ) self.action_space = Box(low=0.0, high=1.0, shape=(), dtype=np.float32) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) if self.slow_reset > 0: time.sleep(self.slow_reset) return self.observation_space.sample(), {} def step(self, action): time.sleep(action) observation = self.observation_space.sample() reward, terminated, truncated = 0.0, False, False return observation, reward, terminated, truncated, {} class CustomSpace(gym.Space): """Minimal custom observation space.""" def sample(self): return self.np_random.integers(0, 10, ()) def contains(self, x): return 0 <= x <= 10 def __eq__(self, other): return isinstance(other, CustomSpace) custom_spaces = [ CustomSpace(), Tuple((CustomSpace(), Box(low=0, high=255, shape=(), dtype=np.uint8))), ] class CustomSpaceEnv(gym.Env): def __init__(self): super().__init__() self.observation_space = CustomSpace() self.action_space = CustomSpace() def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) return "reset", {} def step(self, action): observation = f"step({action:s})" reward, terminated, truncated = 0.0, False, False return observation, reward, terminated, truncated, {} def make_env(env_name, seed, **kwargs): def _make(): env = gym.make(env_name, disable_env_checker=True, **kwargs) env.action_space.seed(seed) env.reset(seed=seed) return env return _make def make_slow_env(slow_reset, seed): def _make(): env = UnittestSlowEnv(slow_reset=slow_reset) env.reset(seed=seed) return env return _make def make_custom_space_env(seed): def _make(): env = CustomSpaceEnv() env.reset(seed=seed) return env return _make def assert_rng_equal(rng_1: RandomNumberGenerator, rng_2: RandomNumberGenerator): assert rng_1.bit_generator.state == rng_2.bit_generator.state ================================================ FILE: tests/wrappers/__init__.py ================================================ ================================================ FILE: tests/wrappers/test_atari_preprocessing.py ================================================ import numpy as np import pytest from gym.spaces import Box, Discrete from gym.wrappers import AtariPreprocessing, StepAPICompatibility from tests.testing_env import GenericTestEnv, old_step_fn class AleTesting: """A testing implementation for the ALE object in atari games.""" grayscale_obs_space = Box(low=0, high=255, shape=(210, 160), dtype=np.uint8, seed=1) rgb_obs_space = Box(low=0, high=255, shape=(210, 160, 3), dtype=np.uint8, seed=1) def lives(self) -> int: """Returns the number of lives in the atari game.""" return 1 def getScreenGrayscale(self, buffer: np.ndarray): """Updates the buffer with a random grayscale observation.""" buffer[...] = self.grayscale_obs_space.sample() def getScreenRGB(self, buffer: np.ndarray): """Updates the buffer with a random rgb observation.""" buffer[...] = self.rgb_obs_space.sample() class AtariTestingEnv(GenericTestEnv): """A testing environment to replicate the atari (ale-py) environments.""" def __init__(self): super().__init__( observation_space=Box( low=0, high=255, shape=(210, 160, 3), dtype=np.uint8, seed=1 ), action_space=Discrete(3, seed=1), step_fn=old_step_fn, ) self.ale = AleTesting() def get_action_meanings(self): """Returns the meanings of each of the actions available to the agent. First index must be 'NOOP'.""" return ["NOOP", "UP", "DOWN"] @pytest.mark.parametrize( "env, obs_shape", [ (AtariTestingEnv(), (210, 160, 3)), ( AtariPreprocessing( StepAPICompatibility(AtariTestingEnv(), output_truncation_bool=True), screen_size=84, grayscale_obs=True, frame_skip=1, noop_max=0, ), (84, 84), ), ( AtariPreprocessing( StepAPICompatibility(AtariTestingEnv(), output_truncation_bool=True), screen_size=84, grayscale_obs=False, frame_skip=1, noop_max=0, ), (84, 84, 3), ), ( AtariPreprocessing( StepAPICompatibility(AtariTestingEnv(), output_truncation_bool=True), screen_size=84, grayscale_obs=True, frame_skip=1, noop_max=0, grayscale_newaxis=True, ), (84, 84, 1), ), ], ) def test_atari_preprocessing_grayscale(env, obs_shape): assert env.observation_space.shape == obs_shape # It is not possible to test the outputs as we are not using actual observations. # todo: update when ale-py is compatible with the ci env = StepAPICompatibility( env, output_truncation_bool=True ) # using compatibility wrapper since ale-py uses old step API obs, _ = env.reset(seed=0) assert obs in env.observation_space obs, _, _, _, _ = env.step(env.action_space.sample()) assert obs in env.observation_space env.close() @pytest.mark.parametrize("grayscale", [True, False]) @pytest.mark.parametrize("scaled", [True, False]) def test_atari_preprocessing_scale(grayscale, scaled, max_test_steps=10): # arbitrarily chosen number for stepping into env. and ensuring all observations are in the required range env = AtariPreprocessing( StepAPICompatibility(AtariTestingEnv(), output_truncation_bool=True), screen_size=84, grayscale_obs=grayscale, scale_obs=scaled, frame_skip=1, noop_max=0, ) obs, _ = env.reset() max_obs = 1 if scaled else 255 assert np.all(0 <= obs) and np.all(obs <= max_obs) terminated, truncated, step_i = False, False, 0 while not (terminated or truncated) and step_i <= max_test_steps: obs, _, terminated, truncated, _ = env.step(env.action_space.sample()) assert np.all(0 <= obs) and np.all(obs <= max_obs) step_i += 1 env.close() ================================================ FILE: tests/wrappers/test_autoreset.py ================================================ """Tests the gym.wrapper.AutoResetWrapper operates as expected.""" from typing import Generator, Optional from unittest.mock import MagicMock import numpy as np import pytest import gym from gym.wrappers import AutoResetWrapper from tests.envs.utils import all_testing_env_specs class DummyResetEnv(gym.Env): """A dummy environment which returns ascending numbers starting at `0` when :meth:`self.step()` is called. After the second call to :meth:`self.step()` terminated is true. Info dicts are also returned containing the same number returned as an observation, accessible via the key "count". This environment is provided for the purpose of testing the autoreset wrapper. """ metadata = {} def __init__(self): """Initialise the DummyResetEnv.""" self.action_space = gym.spaces.Box( low=np.array([0]), high=np.array([2]), dtype=np.int64 ) self.observation_space = gym.spaces.Discrete(2) self.count = 0 def step(self, action: int): """Steps the DummyEnv with the incremented step, reward and terminated `if self.count > 1` and updated info.""" self.count += 1 return ( np.array([self.count]), # Obs self.count > 2, # Reward self.count > 2, # Terminated False, # Truncated {"count": self.count}, # Info ) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): """Resets the DummyEnv to return the count array and info with count.""" self.count = 0 return np.array([self.count]), {"count": self.count} def unwrap_env(env) -> Generator[gym.Wrapper, None, None]: """Unwraps an environment yielding all wrappers around environment.""" while isinstance(env, gym.Wrapper): yield type(env) env = env.env @pytest.mark.parametrize( "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs] ) def test_make_autoreset_true(spec): """Tests gym.make with `autoreset=True`, and check that the reset actually happens. Note: This test assumes that the outermost wrapper is AutoResetWrapper so if that is being changed in the future, this test will break and need to be updated. Note: This test assumes that all first-party environments will terminate in a finite amount of time with random actions, which is true as of the time of adding this test. """ env = gym.make(spec.id, autoreset=True, disable_env_checker=True) assert AutoResetWrapper in unwrap_env(env) env.reset(seed=0) env.unwrapped.reset = MagicMock(side_effect=env.unwrapped.reset) terminated, truncated = False, False while not (terminated or truncated): obs, reward, terminated, truncated, info = env.step(env.action_space.sample()) assert env.unwrapped.reset.called env.close() @pytest.mark.parametrize( "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs] ) def test_gym_make_autoreset(spec): """Tests that `gym.make` autoreset wrapper is applied only when `gym.make(..., autoreset=True)`.""" env = gym.make(spec.id, disable_env_checker=True) assert AutoResetWrapper not in unwrap_env(env) env.close() env = gym.make(spec.id, autoreset=False, disable_env_checker=True) assert AutoResetWrapper not in unwrap_env(env) env.close() env = gym.make(spec.id, autoreset=True, disable_env_checker=True) assert AutoResetWrapper in unwrap_env(env) env.close() def test_autoreset_wrapper_autoreset(): """Tests the autoreset wrapper actually automatically resets correctly.""" env = DummyResetEnv() env = AutoResetWrapper(env) obs, info = env.reset() assert obs == np.array([0]) assert info == {"count": 0} action = 0 obs, reward, terminated, truncated, info = env.step(action) assert obs == np.array([1]) assert reward == 0 assert (terminated or truncated) is False assert info == {"count": 1} obs, reward, terminated, truncated, info = env.step(action) assert obs == np.array([2]) assert (terminated or truncated) is False assert reward == 0 assert info == {"count": 2} obs, reward, terminated, truncated, info = env.step(action) assert obs == np.array([0]) assert (terminated or truncated) is True assert reward == 1 assert info == { "count": 0, "final_observation": np.array([3]), "final_info": {"count": 3}, } obs, reward, terminated, truncated, info = env.step(action) assert obs == np.array([1]) assert reward == 0 assert (terminated or truncated) is False assert info == {"count": 1} env.close() ================================================ FILE: tests/wrappers/test_clip_action.py ================================================ import numpy as np import gym from gym.wrappers import ClipAction def test_clip_action(): # mountaincar: action-based rewards env = gym.make("MountainCarContinuous-v0", disable_env_checker=True) wrapped_env = ClipAction( gym.make("MountainCarContinuous-v0", disable_env_checker=True) ) seed = 0 env.reset(seed=seed) wrapped_env.reset(seed=seed) actions = [[0.4], [1.2], [-0.3], [0.0], [-2.5]] for action in actions: obs1, r1, ter1, trunc1, _ = env.step( np.clip(action, env.action_space.low, env.action_space.high) ) obs2, r2, ter2, trunc2, _ = wrapped_env.step(action) assert np.allclose(r1, r2) assert np.allclose(obs1, obs2) assert ter1 == ter2 assert trunc1 == trunc2 ================================================ FILE: tests/wrappers/test_filter_observation.py ================================================ from typing import Optional, Tuple import numpy as np import pytest import gym from gym import spaces from gym.wrappers.filter_observation import FilterObservation class FakeEnvironment(gym.Env): def __init__( self, render_mode=None, observation_keys: Tuple[str, ...] = ("state",) ): self.observation_space = spaces.Dict( { name: spaces.Box(shape=(2,), low=-1, high=1, dtype=np.float32) for name in observation_keys } ) self.action_space = spaces.Box(shape=(1,), low=-1, high=1, dtype=np.float32) self.render_mode = render_mode def render(self, mode="human"): image_shape = (32, 32, 3) return np.zeros(image_shape, dtype=np.uint8) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) observation = self.observation_space.sample() return observation, {} def step(self, action): del action observation = self.observation_space.sample() reward, terminal, info = 0.0, False, {} return observation, reward, terminal, info FILTER_OBSERVATION_TEST_CASES = ( (("key1", "key2"), ("key1",)), (("key1", "key2"), ("key1", "key2")), (("key1",), None), (("key1",), ("key1",)), ) ERROR_TEST_CASES = ( ("key", ValueError, "All the filter_keys must be included..*"), (False, TypeError, "'bool' object is not iterable"), (1, TypeError, "'int' object is not iterable"), ) class TestFilterObservation: @pytest.mark.parametrize( "observation_keys,filter_keys", FILTER_OBSERVATION_TEST_CASES ) def test_filter_observation(self, observation_keys, filter_keys): env = FakeEnvironment(observation_keys=observation_keys) # Make sure we are testing the right environment for the test. observation_space = env.observation_space assert isinstance(observation_space, spaces.Dict) wrapped_env = FilterObservation(env, filter_keys=filter_keys) assert isinstance(wrapped_env.observation_space, spaces.Dict) if filter_keys is None: filter_keys = tuple(observation_keys) assert len(wrapped_env.observation_space.spaces) == len(filter_keys) assert tuple(wrapped_env.observation_space.spaces.keys()) == tuple(filter_keys) # Check that the added space item is consistent with the added observation. observation, info = wrapped_env.reset() assert len(observation) == len(filter_keys) assert isinstance(info, dict) @pytest.mark.parametrize("filter_keys,error_type,error_match", ERROR_TEST_CASES) def test_raises_with_incorrect_arguments( self, filter_keys, error_type, error_match ): env = FakeEnvironment(observation_keys=("key1", "key2")) with pytest.raises(error_type, match=error_match): FilterObservation(env, filter_keys=filter_keys) ================================================ FILE: tests/wrappers/test_flatten.py ================================================ """Tests for the flatten observation wrapper.""" from collections import OrderedDict from typing import Optional import numpy as np import pytest import gym from gym.spaces import Box, Dict, flatten, unflatten from gym.wrappers import FlattenObservation class FakeEnvironment(gym.Env): def __init__(self, observation_space): self.observation_space = observation_space def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) self.observation = self.observation_space.sample() return self.observation, {} OBSERVATION_SPACES = ( ( Dict( OrderedDict( [ ("key1", Box(shape=(2, 3), low=0, high=0, dtype=np.float32)), ("key2", Box(shape=(), low=1, high=1, dtype=np.float32)), ("key3", Box(shape=(2,), low=2, high=2, dtype=np.float32)), ] ) ), True, ), ( Dict( OrderedDict( [ ("key2", Box(shape=(), low=0, high=0, dtype=np.float32)), ("key3", Box(shape=(2,), low=1, high=1, dtype=np.float32)), ("key1", Box(shape=(2, 3), low=2, high=2, dtype=np.float32)), ] ) ), True, ), ( Dict( { "key1": Box(shape=(2, 3), low=-1, high=1, dtype=np.float32), "key2": Box(shape=(), low=-1, high=1, dtype=np.float32), "key3": Box(shape=(2,), low=-1, high=1, dtype=np.float32), } ), False, ), ) class TestFlattenEnvironment: @pytest.mark.parametrize("observation_space, ordered_values", OBSERVATION_SPACES) def test_flattened_environment(self, observation_space, ordered_values): """ make sure that flattened observations occur in the order expected """ env = FakeEnvironment(observation_space=observation_space) wrapped_env = FlattenObservation(env) flattened, info = wrapped_env.reset() unflattened = unflatten(env.observation_space, flattened) original = env.observation self._check_observations(original, flattened, unflattened, ordered_values) @pytest.mark.parametrize("observation_space, ordered_values", OBSERVATION_SPACES) def test_flatten_unflatten(self, observation_space, ordered_values): """ test flatten and unflatten functions directly """ original = observation_space.sample() flattened = flatten(observation_space, original) unflattened = unflatten(observation_space, flattened) self._check_observations(original, flattened, unflattened, ordered_values) def _check_observations(self, original, flattened, unflattened, ordered_values): # make sure that unflatten(flatten(original)) == original assert set(unflattened.keys()) == set(original.keys()) for k, v in original.items(): np.testing.assert_allclose(unflattened[k], v) if ordered_values: # make sure that the values were flattened in the order they appeared in the # OrderedDict np.testing.assert_allclose(sorted(flattened), flattened) ================================================ FILE: tests/wrappers/test_flatten_observation.py ================================================ import numpy as np import pytest import gym from gym import spaces from gym.wrappers import FlattenObservation @pytest.mark.parametrize("env_id", ["Blackjack-v1"]) def test_flatten_observation(env_id): env = gym.make(env_id, disable_env_checker=True) wrapped_env = FlattenObservation(env) obs, info = env.reset() wrapped_obs, wrapped_obs_info = wrapped_env.reset() space = spaces.Tuple((spaces.Discrete(32), spaces.Discrete(11), spaces.Discrete(2))) wrapped_space = spaces.Box(0, 1, [32 + 11 + 2], dtype=np.int64) assert space.contains(obs) assert wrapped_space.contains(wrapped_obs) assert isinstance(info, dict) assert isinstance(wrapped_obs_info, dict) ================================================ FILE: tests/wrappers/test_frame_stack.py ================================================ import numpy as np import pytest import gym from gym.wrappers import FrameStack try: import lz4 except ImportError: lz4 = None @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1", "CarRacing-v2"]) @pytest.mark.parametrize("num_stack", [2, 3, 4]) @pytest.mark.parametrize( "lz4_compress", [ pytest.param( True, marks=pytest.mark.skipif( lz4 is None, reason="Need lz4 to run tests with compression" ), ), False, ], ) def test_frame_stack(env_id, num_stack, lz4_compress): env = gym.make(env_id, disable_env_checker=True) shape = env.observation_space.shape env = FrameStack(env, num_stack, lz4_compress) assert env.observation_space.shape == (num_stack,) + shape assert env.observation_space.dtype == env.env.observation_space.dtype dup = gym.make(env_id, disable_env_checker=True) obs, _ = env.reset(seed=0) dup_obs, _ = dup.reset(seed=0) assert np.allclose(obs[-1], dup_obs) for _ in range(num_stack**2): action = env.action_space.sample() dup_obs, _, dup_terminated, dup_truncated, _ = dup.step(action) obs, _, terminated, truncated, _ = env.step(action) assert dup_terminated == terminated assert dup_truncated == truncated assert np.allclose(obs[-1], dup_obs) if terminated or truncated: break assert len(obs) == num_stack ================================================ FILE: tests/wrappers/test_gray_scale_observation.py ================================================ import pytest import gym from gym import spaces from gym.wrappers import GrayScaleObservation @pytest.mark.parametrize("env_id", ["CarRacing-v2"]) @pytest.mark.parametrize("keep_dim", [True, False]) def test_gray_scale_observation(env_id, keep_dim): rgb_env = gym.make(env_id, disable_env_checker=True) assert isinstance(rgb_env.observation_space, spaces.Box) assert len(rgb_env.observation_space.shape) == 3 assert rgb_env.observation_space.shape[-1] == 3 wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim) assert isinstance(wrapped_env.observation_space, spaces.Box) if keep_dim: assert len(wrapped_env.observation_space.shape) == 3 assert wrapped_env.observation_space.shape[-1] == 1 else: assert len(wrapped_env.observation_space.shape) == 2 wrapped_obs, info = wrapped_env.reset() assert wrapped_obs in wrapped_env.observation_space ================================================ FILE: tests/wrappers/test_human_rendering.py ================================================ import re import pytest import gym from gym.wrappers import HumanRendering def test_human_rendering(): for mode in ["rgb_array", "rgb_array_list"]: env = HumanRendering( gym.make("CartPole-v1", render_mode=mode, disable_env_checker=True) ) assert env.render_mode == "human" env.reset() for _ in range(75): _, _, terminated, truncated, _ = env.step(env.action_space.sample()) if terminated or truncated: env.reset() env.close() env = gym.make("CartPole-v1", render_mode="human") with pytest.raises( AssertionError, match=re.escape( "Expected env.render_mode to be one of 'rgb_array' or 'rgb_array_list' but got 'human'" ), ): HumanRendering(env) env.close() ================================================ FILE: tests/wrappers/test_nested_dict.py ================================================ """Tests for the filter observation wrapper.""" from typing import Optional import numpy as np import pytest import gym from gym.spaces import Box, Dict, Tuple from gym.wrappers import FilterObservation, FlattenObservation class FakeEnvironment(gym.Env): def __init__(self, observation_space, render_mode=None): self.observation_space = observation_space self.obs_keys = self.observation_space.spaces.keys() self.action_space = Box(shape=(1,), low=-1, high=1, dtype=np.float32) self.render_mode = render_mode def render(self, mode="human"): image_shape = (32, 32, 3) return np.zeros(image_shape, dtype=np.uint8) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) observation = self.observation_space.sample() return observation, {} def step(self, action): del action observation = self.observation_space.sample() reward, terminal, info = 0.0, False, {} return observation, reward, terminal, info NESTED_DICT_TEST_CASES = ( ( Dict( { "key1": Box(shape=(2,), low=-1, high=1, dtype=np.float32), "key2": Dict( { "subkey1": Box(shape=(2,), low=-1, high=1, dtype=np.float32), "subkey2": Box(shape=(2,), low=-1, high=1, dtype=np.float32), } ), } ), (6,), ), ( Dict( { "key1": Box(shape=(2, 3), low=-1, high=1, dtype=np.float32), "key2": Box(shape=(), low=-1, high=1, dtype=np.float32), "key3": Box(shape=(2,), low=-1, high=1, dtype=np.float32), } ), (9,), ), ( Dict( { "key1": Tuple( ( Box(shape=(2,), low=-1, high=1, dtype=np.float32), Box(shape=(2,), low=-1, high=1, dtype=np.float32), ) ), "key2": Box(shape=(), low=-1, high=1, dtype=np.float32), "key3": Box(shape=(2,), low=-1, high=1, dtype=np.float32), } ), (7,), ), ( Dict( { "key1": Tuple((Box(shape=(2,), low=-1, high=1, dtype=np.float32),)), "key2": Box(shape=(), low=-1, high=1, dtype=np.float32), "key3": Box(shape=(2,), low=-1, high=1, dtype=np.float32), } ), (5,), ), ( Dict( { "key1": Tuple( (Dict({"key9": Box(shape=(2,), low=-1, high=1, dtype=np.float32)}),) ), "key2": Box(shape=(), low=-1, high=1, dtype=np.float32), "key3": Box(shape=(2,), low=-1, high=1, dtype=np.float32), } ), (5,), ), ) class TestNestedDictWrapper: @pytest.mark.parametrize("observation_space, flat_shape", NESTED_DICT_TEST_CASES) def test_nested_dicts_size(self, observation_space, flat_shape): env = FakeEnvironment(observation_space=observation_space) # Make sure we are testing the right environment for the test. observation_space = env.observation_space assert isinstance(observation_space, Dict) wrapped_env = FlattenObservation(FilterObservation(env, env.obs_keys)) assert wrapped_env.observation_space.shape == flat_shape assert wrapped_env.observation_space.dtype == np.float32 @pytest.mark.parametrize("observation_space, flat_shape", NESTED_DICT_TEST_CASES) def test_nested_dicts_ravel(self, observation_space, flat_shape): env = FakeEnvironment(observation_space=observation_space) wrapped_env = FlattenObservation(FilterObservation(env, env.obs_keys)) obs, info = wrapped_env.reset() assert obs.shape == wrapped_env.observation_space.shape assert isinstance(info, dict) ================================================ FILE: tests/wrappers/test_normalize.py ================================================ from typing import Optional import numpy as np from numpy.testing import assert_almost_equal import gym from gym.wrappers.normalize import NormalizeObservation, NormalizeReward class DummyRewardEnv(gym.Env): metadata = {} def __init__(self, return_reward_idx=0): self.action_space = gym.spaces.Discrete(2) self.observation_space = gym.spaces.Box( low=np.array([-1.0]), high=np.array([1.0]), dtype=np.float64 ) self.returned_rewards = [0, 1, 2, 3, 4] self.return_reward_idx = return_reward_idx self.t = self.return_reward_idx def step(self, action): self.t += 1 return ( np.array([self.t]), self.t, self.t == len(self.returned_rewards), False, {}, ) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) self.t = self.return_reward_idx return np.array([self.t]), {} def make_env(return_reward_idx): def thunk(): env = DummyRewardEnv(return_reward_idx) return env return thunk def test_normalize_observation(): env = DummyRewardEnv(return_reward_idx=0) env = NormalizeObservation(env) env.reset() env.step(env.action_space.sample()) assert_almost_equal(env.obs_rms.mean, 0.5, decimal=4) env.step(env.action_space.sample()) assert_almost_equal(env.obs_rms.mean, 1.0, decimal=4) def test_normalize_reset_info(): env = DummyRewardEnv(return_reward_idx=0) env = NormalizeObservation(env) obs, info = env.reset() assert isinstance(obs, np.ndarray) assert isinstance(info, dict) def test_normalize_return(): env = DummyRewardEnv(return_reward_idx=0) env = NormalizeReward(env) env.reset() env.step(env.action_space.sample()) assert_almost_equal( env.return_rms.mean, np.mean([1]), # [first return] decimal=4, ) env.step(env.action_space.sample()) assert_almost_equal( env.return_rms.mean, np.mean([2 + env.gamma * 1, 1]), # [second return, first return] decimal=4, ) def test_normalize_observation_vector_env(): env_fns = [make_env(0), make_env(1)] envs = gym.vector.SyncVectorEnv(env_fns) envs.reset() obs, reward, _, _, _ = envs.step(envs.action_space.sample()) np.testing.assert_almost_equal(obs, np.array([[1], [2]]), decimal=4) np.testing.assert_almost_equal(reward, np.array([1, 2]), decimal=4) env_fns = [make_env(0), make_env(1)] envs = gym.vector.SyncVectorEnv(env_fns) envs = NormalizeObservation(envs) envs.reset() assert_almost_equal( envs.obs_rms.mean, np.mean([0.5]), # the mean of first observations [[0, 1]] decimal=4, ) obs, reward, _, _, _ = envs.step(envs.action_space.sample()) assert_almost_equal( envs.obs_rms.mean, np.mean([1.0]), # the mean of first and second observations [[0, 1], [1, 2]] decimal=4, ) def test_normalize_return_vector_env(): env_fns = [make_env(0), make_env(1)] envs = gym.vector.SyncVectorEnv(env_fns) envs = NormalizeReward(envs) obs = envs.reset() obs, reward, _, _, _ = envs.step(envs.action_space.sample()) assert_almost_equal( envs.return_rms.mean, np.mean([1.5]), # the mean of first returns [[1, 2]] decimal=4, ) obs, reward, _, _, _ = envs.step(envs.action_space.sample()) assert_almost_equal( envs.return_rms.mean, np.mean( [[1, 2], [2 + envs.gamma * 1, 3 + envs.gamma * 2]] ), # the mean of first and second returns [[1, 2], [2 + envs.gamma * 1, 3 + envs.gamma * 2]] decimal=4, ) ================================================ FILE: tests/wrappers/test_order_enforcing.py ================================================ import pytest import gym from gym.envs.classic_control import CartPoleEnv from gym.error import ResetNeeded from gym.wrappers import OrderEnforcing from tests.envs.utils import all_testing_env_specs from tests.wrappers.utils import has_wrapper @pytest.mark.parametrize( "spec", all_testing_env_specs, ids=[spec.id for spec in all_testing_env_specs] ) def test_gym_make_order_enforcing(spec): """Checks that gym.make wrappers the environment with the OrderEnforcing wrapper.""" env = gym.make(spec.id, disable_env_checker=True) assert has_wrapper(env, OrderEnforcing) def test_order_enforcing(): """Checks that the order enforcing works as expected, raising an error before reset is called and not after.""" # The reason for not using gym.make is that all environments are by default wrapped in the order enforcing wrapper env = CartPoleEnv(render_mode="rgb_array_list") assert not has_wrapper(env, OrderEnforcing) # Assert that the order enforcing works for step and render before reset order_enforced_env = OrderEnforcing(env) assert order_enforced_env.has_reset is False with pytest.raises(ResetNeeded): order_enforced_env.step(0) with pytest.raises(ResetNeeded): order_enforced_env.render() assert order_enforced_env.has_reset is False # Assert that the Assertion errors are not raised after reset order_enforced_env.reset() assert order_enforced_env.has_reset is True order_enforced_env.step(0) order_enforced_env.render() # Assert that with disable_render_order_enforcing works, the environment has already been reset env = CartPoleEnv(render_mode="rgb_array_list") env = OrderEnforcing(env, disable_render_order_enforcing=True) env.render() # no assertion error ================================================ FILE: tests/wrappers/test_passive_env_checker.py ================================================ import re import warnings import numpy as np import pytest import gym from gym.wrappers.env_checker import PassiveEnvChecker from tests.envs.test_envs import PASSIVE_CHECK_IGNORE_WARNING from tests.envs.utils import all_testing_initialised_envs from tests.testing_env import GenericTestEnv @pytest.mark.parametrize( "env", all_testing_initialised_envs, ids=[env.spec.id for env in all_testing_initialised_envs], ) def test_passive_checker_wrapper_warnings(env): with warnings.catch_warnings(record=True) as caught_warnings: checker_env = PassiveEnvChecker(env) checker_env.reset() checker_env.step(checker_env.action_space.sample()) # todo, add check for render, bugged due to mujoco v2/3 and v4 envs checker_env.close() for warning in caught_warnings: if warning.message.args[0] not in PASSIVE_CHECK_IGNORE_WARNING: raise gym.error.Error(f"Unexpected warning: {warning.message}") @pytest.mark.parametrize( "env, message", [ ( GenericTestEnv(action_space=None), "The environment must specify an action space. https://www.gymlibrary.dev/content/environment_creation/", ), ( GenericTestEnv(action_space="error"), "action space does not inherit from `gym.spaces.Space`, actual type: ", ), ( GenericTestEnv(observation_space=None), "The environment must specify an observation space. https://www.gymlibrary.dev/content/environment_creation/", ), ( GenericTestEnv(observation_space="error"), "observation space does not inherit from `gym.spaces.Space`, actual type: ", ), ], ) def test_initialise_failures(env, message): with pytest.raises(AssertionError, match=f"^{re.escape(message)}$"): PassiveEnvChecker(env) env.close() def _reset_failure(self, seed=None, options=None): return np.array([-1.0], dtype=np.float32), {} def _step_failure(self, action): return "error" def test_api_failures(): env = GenericTestEnv( reset_fn=_reset_failure, step_fn=_step_failure, metadata={"render_modes": "error"}, ) env = PassiveEnvChecker(env) assert env.checked_reset is False assert env.checked_step is False assert env.checked_render is False with pytest.warns( UserWarning, match=re.escape( "The obs returned by the `reset()` method is not within the observation space" ), ): env.reset() assert env.checked_reset with pytest.raises( AssertionError, match="Expects step result to be a tuple, actual type: ", ): env.step(env.action_space.sample()) assert env.checked_step with pytest.warns( UserWarning, match=r"Expects the render_modes to be a sequence \(i\.e\. list, tuple\), actual type: ", ): env.render() assert env.checked_render env.close() ================================================ FILE: tests/wrappers/test_pixel_observation.py ================================================ """Tests for the pixel observation wrapper.""" from typing import Optional import numpy as np import pytest import gym from gym import spaces from gym.wrappers.pixel_observation import STATE_KEY, PixelObservationWrapper class FakeEnvironment(gym.Env): def __init__(self, render_mode="single_rgb_array"): self.action_space = spaces.Box(shape=(1,), low=-1, high=1, dtype=np.float32) self.render_mode = render_mode def render(self, mode="human", width=32, height=32): image_shape = (height, width, 3) return np.zeros(image_shape, dtype=np.uint8) def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None): super().reset(seed=seed) observation = self.observation_space.sample() return observation, {} def step(self, action): del action observation = self.observation_space.sample() reward, terminal, info = 0.0, False, {} return observation, reward, terminal, info class FakeArrayObservationEnvironment(FakeEnvironment): def __init__(self, *args, **kwargs): self.observation_space = spaces.Box( shape=(2,), low=-1, high=1, dtype=np.float32 ) super().__init__(*args, **kwargs) class FakeDictObservationEnvironment(FakeEnvironment): def __init__(self, *args, **kwargs): self.observation_space = spaces.Dict( { "state": spaces.Box(shape=(2,), low=-1, high=1, dtype=np.float32), } ) super().__init__(*args, **kwargs) @pytest.mark.parametrize("pixels_only", (True, False)) def test_dict_observation(pixels_only): pixel_key = "rgb" env = FakeDictObservationEnvironment() # Make sure we are testing the right environment for the test. observation_space = env.observation_space assert isinstance(observation_space, spaces.Dict) width, height = (320, 240) # The wrapper should only add one observation. wrapped_env = PixelObservationWrapper( env, pixel_keys=(pixel_key,), pixels_only=pixels_only, render_kwargs={pixel_key: {"width": width, "height": height}}, ) assert isinstance(wrapped_env.observation_space, spaces.Dict) if pixels_only: assert len(wrapped_env.observation_space.spaces) == 1 assert list(wrapped_env.observation_space.spaces.keys()) == [pixel_key] else: assert ( len(wrapped_env.observation_space.spaces) == len(observation_space.spaces) + 1 ) expected_keys = list(observation_space.spaces.keys()) + [pixel_key] assert list(wrapped_env.observation_space.spaces.keys()) == expected_keys # Check that the added space item is consistent with the added observation. observation, info = wrapped_env.reset() rgb_observation = observation[pixel_key] assert isinstance(info, dict) assert rgb_observation.shape == (height, width, 3) assert rgb_observation.dtype == np.uint8 @pytest.mark.parametrize("pixels_only", (True, False)) def test_single_array_observation(pixels_only): pixel_key = "depth" env = FakeArrayObservationEnvironment() observation_space = env.observation_space assert isinstance(observation_space, spaces.Box) wrapped_env = PixelObservationWrapper( env, pixel_keys=(pixel_key,), pixels_only=pixels_only ) wrapped_env.observation_space = wrapped_env.observation_space assert isinstance(wrapped_env.observation_space, spaces.Dict) if pixels_only: assert len(wrapped_env.observation_space.spaces) == 1 assert list(wrapped_env.observation_space.spaces.keys()) == [pixel_key] else: assert len(wrapped_env.observation_space.spaces) == 2 assert list(wrapped_env.observation_space.spaces.keys()) == [ STATE_KEY, pixel_key, ] observation, info = wrapped_env.reset() depth_observation = observation[pixel_key] assert isinstance(info, dict) assert depth_observation.shape == (32, 32, 3) assert depth_observation.dtype == np.uint8 if not pixels_only: assert isinstance(observation[STATE_KEY], np.ndarray) ================================================ FILE: tests/wrappers/test_record_episode_statistics.py ================================================ import numpy as np import pytest import gym from gym.wrappers import RecordEpisodeStatistics, VectorListInfo from gym.wrappers.record_episode_statistics import add_vector_episode_statistics @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"]) @pytest.mark.parametrize("deque_size", [2, 5]) def test_record_episode_statistics(env_id, deque_size): env = gym.make(env_id, disable_env_checker=True) env = RecordEpisodeStatistics(env, deque_size) for n in range(5): env.reset() assert env.episode_returns is not None and env.episode_lengths is not None assert env.episode_returns[0] == 0.0 assert env.episode_lengths[0] == 0 for t in range(env.spec.max_episode_steps): _, _, terminated, truncated, info = env.step(env.action_space.sample()) if terminated or truncated: assert "episode" in info assert all([item in info["episode"] for item in ["r", "l", "t"]]) break assert len(env.return_queue) == deque_size assert len(env.length_queue) == deque_size def test_record_episode_statistics_reset_info(): env = gym.make("CartPole-v1", disable_env_checker=True) env = RecordEpisodeStatistics(env) ob_space = env.observation_space obs, info = env.reset() assert ob_space.contains(obs) assert isinstance(info, dict) @pytest.mark.parametrize( ("num_envs", "asynchronous"), [(1, False), (1, True), (4, False), (4, True)] ) def test_record_episode_statistics_with_vectorenv(num_envs, asynchronous): envs = gym.vector.make( "CartPole-v1", render_mode=None, num_envs=num_envs, asynchronous=asynchronous, disable_env_checker=True, ) envs = RecordEpisodeStatistics(envs) max_episode_step = ( envs.env_fns[0]().spec.max_episode_steps if asynchronous else envs.env.envs[0].spec.max_episode_steps ) envs.reset() for _ in range(max_episode_step + 1): _, _, terminateds, truncateds, infos = envs.step(envs.action_space.sample()) if any(terminateds) or any(truncateds): assert "episode" in infos assert "_episode" in infos assert all(infos["_episode"] == np.bitwise_or(terminateds, truncateds)) assert all([item in infos["episode"] for item in ["r", "l", "t"]]) break else: assert "episode" not in infos assert "_episode" not in infos def test_wrong_wrapping_order(): envs = gym.vector.make("CartPole-v1", num_envs=3, disable_env_checker=True) wrapped_env = RecordEpisodeStatistics(VectorListInfo(envs)) wrapped_env.reset() with pytest.raises(AssertionError): wrapped_env.step(wrapped_env.action_space.sample()) def test_add_vector_episode_statistics(): NUM_ENVS = 5 info = {} for i in range(NUM_ENVS): episode_info = { "episode": { "r": i, "l": i, "t": i, } } info = add_vector_episode_statistics(info, episode_info["episode"], NUM_ENVS, i) assert np.alltrue(info["_episode"][: i + 1]) for j in range(NUM_ENVS): if j <= i: assert info["episode"]["r"][j] == j assert info["episode"]["l"][j] == j assert info["episode"]["t"][j] == j else: assert info["episode"]["r"][j] == 0 assert info["episode"]["l"][j] == 0 assert info["episode"]["t"][j] == 0 ================================================ FILE: tests/wrappers/test_record_video.py ================================================ import os import shutil import gym from gym.wrappers import capped_cubic_video_schedule def test_record_video_using_default_trigger(): env = gym.make( "CartPole-v1", render_mode="rgb_array_list", disable_env_checker=True ) env = gym.wrappers.RecordVideo(env, "videos") env.reset() for _ in range(199): action = env.action_space.sample() _, _, terminated, truncated, _ = env.step(action) if terminated or truncated: env.reset() env.close() assert os.path.isdir("videos") mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")] assert len(mp4_files) == sum( capped_cubic_video_schedule(i) for i in range(env.episode_id + 1) ) shutil.rmtree("videos") def test_record_video_reset(): env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True) env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0) ob_space = env.observation_space obs, info = env.reset() env.close() assert os.path.isdir("videos") shutil.rmtree("videos") assert ob_space.contains(obs) assert isinstance(info, dict) def test_record_video_step_trigger(): env = gym.make("CartPole-v1", render_mode="rgb_array", disable_env_checker=True) env._max_episode_steps = 20 env = gym.wrappers.RecordVideo(env, "videos", step_trigger=lambda x: x % 100 == 0) env.reset() for _ in range(199): action = env.action_space.sample() _, _, terminated, truncated, _ = env.step(action) if terminated or truncated: env.reset() env.close() assert os.path.isdir("videos") mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")] assert len(mp4_files) == 2 shutil.rmtree("videos") def make_env(gym_id, seed, **kwargs): def thunk(): env = gym.make(gym_id, disable_env_checker=True, **kwargs) env._max_episode_steps = 20 if seed == 1: env = gym.wrappers.RecordVideo( env, "videos", step_trigger=lambda x: x % 100 == 0 ) return env return thunk def test_record_video_within_vector(): envs = gym.vector.SyncVectorEnv( [make_env("CartPole-v1", 1 + i, render_mode="rgb_array") for i in range(2)] ) envs = gym.wrappers.RecordEpisodeStatistics(envs) envs.reset() for i in range(199): _, _, _, _, infos = envs.step(envs.action_space.sample()) # break when every env is done if "episode" in infos and all(infos["_episode"]): print(f"episode_reward={infos['episode']['r']}") assert os.path.isdir("videos") mp4_files = [file for file in os.listdir("videos") if file.endswith(".mp4")] assert len(mp4_files) == 2 shutil.rmtree("videos") ================================================ FILE: tests/wrappers/test_rescale_action.py ================================================ import numpy as np import pytest import gym from gym.wrappers import RescaleAction def test_rescale_action(): env = gym.make("CartPole-v1", disable_env_checker=True) with pytest.raises(AssertionError): env = RescaleAction(env, -1, 1) del env env = gym.make("Pendulum-v1", disable_env_checker=True) wrapped_env = RescaleAction( gym.make("Pendulum-v1", disable_env_checker=True), -1, 1 ) seed = 0 obs, info = env.reset(seed=seed) wrapped_obs, wrapped_obs_info = wrapped_env.reset(seed=seed) assert np.allclose(obs, wrapped_obs) obs, reward, _, _, _ = env.step([1.5]) with pytest.raises(AssertionError): wrapped_env.step([1.5]) wrapped_obs, wrapped_reward, _, _, _ = wrapped_env.step([0.75]) assert np.allclose(obs, wrapped_obs) assert np.allclose(reward, wrapped_reward) ================================================ FILE: tests/wrappers/test_resize_observation.py ================================================ import pytest import gym from gym import spaces from gym.wrappers import ResizeObservation @pytest.mark.parametrize("env_id", ["CarRacing-v2"]) @pytest.mark.parametrize("shape", [16, 32, (8, 5), [10, 7]]) def test_resize_observation(env_id, shape): env = gym.make(env_id, disable_env_checker=True) env = ResizeObservation(env, shape) assert isinstance(env.observation_space, spaces.Box) assert env.observation_space.shape[-1] == 3 obs, _ = env.reset() if isinstance(shape, int): assert env.observation_space.shape[:2] == (shape, shape) assert obs.shape == (shape, shape, 3) else: assert env.observation_space.shape[:2] == tuple(shape) assert obs.shape == tuple(shape) + (3,) ================================================ FILE: tests/wrappers/test_step_compatibility.py ================================================ import pytest import gym from gym.spaces import Discrete from gym.wrappers import StepAPICompatibility class OldStepEnv(gym.Env): def __init__(self): self.action_space = Discrete(2) self.observation_space = Discrete(2) def step(self, action): obs = self.observation_space.sample() rew = 0 done = False info = {} return obs, rew, done, info class NewStepEnv(gym.Env): def __init__(self): self.action_space = Discrete(2) self.observation_space = Discrete(2) def step(self, action): obs = self.observation_space.sample() rew = 0 terminated = False truncated = False info = {} return obs, rew, terminated, truncated, info @pytest.mark.parametrize("env", [OldStepEnv, NewStepEnv]) @pytest.mark.parametrize("output_truncation_bool", [None, True]) def test_step_compatibility_to_new_api(env, output_truncation_bool): if output_truncation_bool is None: env = StepAPICompatibility(env()) else: env = StepAPICompatibility(env(), output_truncation_bool) step_returns = env.step(0) _, _, terminated, truncated, _ = step_returns assert isinstance(terminated, bool) assert isinstance(truncated, bool) @pytest.mark.parametrize("env", [OldStepEnv, NewStepEnv]) def test_step_compatibility_to_old_api(env): env = StepAPICompatibility(env(), False) step_returns = env.step(0) assert len(step_returns) == 4 _, _, done, _ = step_returns assert isinstance(done, bool) @pytest.mark.parametrize("apply_api_compatibility", [None, True, False]) def test_step_compatibility_in_make(apply_api_compatibility): gym.register("OldStepEnv-v0", entry_point=OldStepEnv) if apply_api_compatibility is not None: env = gym.make( "OldStepEnv-v0", apply_api_compatibility=apply_api_compatibility, disable_env_checker=True, ) else: env = gym.make("OldStepEnv-v0", disable_env_checker=True) env.reset() step_returns = env.step(0) if apply_api_compatibility: assert len(step_returns) == 5 _, _, terminated, truncated, _ = step_returns assert isinstance(terminated, bool) assert isinstance(truncated, bool) else: assert len(step_returns) == 4 _, _, done, _ = step_returns assert isinstance(done, bool) gym.envs.registry.pop("OldStepEnv-v0") ================================================ FILE: tests/wrappers/test_time_aware_observation.py ================================================ import pytest import gym from gym import spaces from gym.wrappers import TimeAwareObservation @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"]) def test_time_aware_observation(env_id): env = gym.make(env_id, disable_env_checker=True) wrapped_env = TimeAwareObservation(env) assert isinstance(env.observation_space, spaces.Box) assert isinstance(wrapped_env.observation_space, spaces.Box) assert wrapped_env.observation_space.shape[0] == env.observation_space.shape[0] + 1 obs, info = env.reset() wrapped_obs, wrapped_obs_info = wrapped_env.reset() assert wrapped_env.t == 0.0 assert wrapped_obs[-1] == 0.0 assert wrapped_obs.shape[0] == obs.shape[0] + 1 wrapped_obs, _, _, _, _ = wrapped_env.step(env.action_space.sample()) assert wrapped_env.t == 1.0 assert wrapped_obs[-1] == 1.0 assert wrapped_obs.shape[0] == obs.shape[0] + 1 wrapped_obs, _, _, _, _ = wrapped_env.step(env.action_space.sample()) assert wrapped_env.t == 2.0 assert wrapped_obs[-1] == 2.0 assert wrapped_obs.shape[0] == obs.shape[0] + 1 wrapped_obs, wrapped_obs_info = wrapped_env.reset() assert wrapped_env.t == 0.0 assert wrapped_obs[-1] == 0.0 assert wrapped_obs.shape[0] == obs.shape[0] + 1 ================================================ FILE: tests/wrappers/test_time_limit.py ================================================ import pytest import gym from gym.envs.classic_control.pendulum import PendulumEnv from gym.wrappers import TimeLimit def test_time_limit_reset_info(): env = gym.make("CartPole-v1", disable_env_checker=True) env = TimeLimit(env) ob_space = env.observation_space obs, info = env.reset() assert ob_space.contains(obs) assert isinstance(info, dict) @pytest.mark.parametrize("double_wrap", [False, True]) def test_time_limit_wrapper(double_wrap): # The pendulum env does not terminate by default # so we are sure termination is only due to timeout env = PendulumEnv() max_episode_length = 20 env = TimeLimit(env, max_episode_length) if double_wrap: env = TimeLimit(env, max_episode_length) env.reset() terminated, truncated = False, False n_steps = 0 info = {} while not (terminated or truncated): n_steps += 1 _, _, terminated, truncated, info = env.step(env.action_space.sample()) assert n_steps == max_episode_length assert truncated @pytest.mark.parametrize("double_wrap", [False, True]) def test_termination_on_last_step(double_wrap): # Special case: termination at the last timestep # Truncation due to timeout also happens at the same step env = PendulumEnv() def patched_step(_action): return env.observation_space.sample(), 0.0, True, False, {} env.step = patched_step max_episode_length = 1 env = TimeLimit(env, max_episode_length) if double_wrap: env = TimeLimit(env, max_episode_length) env.reset() _, _, terminated, truncated, _ = env.step(env.action_space.sample()) assert terminated is True assert truncated is True ================================================ FILE: tests/wrappers/test_transform_observation.py ================================================ import numpy as np import pytest import gym from gym.wrappers import TransformObservation @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"]) def test_transform_observation(env_id): def affine_transform(x): return 3 * x + 2 env = gym.make(env_id, disable_env_checker=True) wrapped_env = TransformObservation( gym.make(env_id, disable_env_checker=True), lambda obs: affine_transform(obs) ) obs, info = env.reset(seed=0) wrapped_obs, wrapped_obs_info = wrapped_env.reset(seed=0) assert np.allclose(wrapped_obs, affine_transform(obs)) assert isinstance(wrapped_obs_info, dict) action = env.action_space.sample() obs, reward, terminated, truncated, _ = env.step(action) ( wrapped_obs, wrapped_reward, wrapped_terminated, wrapped_truncated, _, ) = wrapped_env.step(action) assert np.allclose(wrapped_obs, affine_transform(obs)) assert np.allclose(wrapped_reward, reward) assert wrapped_terminated == terminated assert wrapped_truncated == truncated ================================================ FILE: tests/wrappers/test_transform_reward.py ================================================ import numpy as np import pytest import gym from gym.wrappers import TransformReward @pytest.mark.parametrize("env_id", ["CartPole-v1", "Pendulum-v1"]) def test_transform_reward(env_id): # use case #1: scale scales = [0.1, 200] for scale in scales: env = gym.make(env_id, disable_env_checker=True) wrapped_env = TransformReward( gym.make(env_id, disable_env_checker=True), lambda r: scale * r ) action = env.action_space.sample() env.reset(seed=0) wrapped_env.reset(seed=0) _, reward, _, _, _ = env.step(action) _, wrapped_reward, _, _, _ = wrapped_env.step(action) assert wrapped_reward == scale * reward del env, wrapped_env # use case #2: clip min_r = -0.0005 max_r = 0.0002 env = gym.make(env_id, disable_env_checker=True) wrapped_env = TransformReward( gym.make(env_id, disable_env_checker=True), lambda r: np.clip(r, min_r, max_r) ) action = env.action_space.sample() env.reset(seed=0) wrapped_env.reset(seed=0) _, reward, _, _, _ = env.step(action) _, wrapped_reward, _, _, _ = wrapped_env.step(action) assert abs(wrapped_reward) < abs(reward) assert wrapped_reward == -0.0005 or wrapped_reward == 0.0002 del env, wrapped_env # use case #3: sign env = gym.make(env_id, disable_env_checker=True) wrapped_env = TransformReward( gym.make(env_id, disable_env_checker=True), lambda r: np.sign(r) ) env.reset(seed=0) wrapped_env.reset(seed=0) for _ in range(1000): action = env.action_space.sample() _, wrapped_reward, terminated, truncated, _ = wrapped_env.step(action) assert wrapped_reward in [-1.0, 0.0, 1.0] if terminated or truncated: break del env, wrapped_env ================================================ FILE: tests/wrappers/test_vector_list_info.py ================================================ import pytest import gym from gym.wrappers import RecordEpisodeStatistics, VectorListInfo ENV_ID = "CartPole-v1" NUM_ENVS = 3 ENV_STEPS = 50 SEED = 42 def test_usage_in_vector_env(): env = gym.make(ENV_ID, disable_env_checker=True) vector_env = gym.vector.make(ENV_ID, num_envs=NUM_ENVS, disable_env_checker=True) VectorListInfo(vector_env) with pytest.raises(AssertionError): VectorListInfo(env) def test_info_to_list(): env_to_wrap = gym.vector.make(ENV_ID, num_envs=NUM_ENVS, disable_env_checker=True) wrapped_env = VectorListInfo(env_to_wrap) wrapped_env.action_space.seed(SEED) _, info = wrapped_env.reset(seed=SEED) assert isinstance(info, list) assert len(info) == NUM_ENVS for _ in range(ENV_STEPS): action = wrapped_env.action_space.sample() _, _, terminateds, truncateds, list_info = wrapped_env.step(action) for i, (terminated, truncated) in enumerate(zip(terminateds, truncateds)): if terminated or truncated: assert "final_observation" in list_info[i] else: assert "final_observation" not in list_info[i] def test_info_to_list_statistics(): env_to_wrap = gym.vector.make(ENV_ID, num_envs=NUM_ENVS, disable_env_checker=True) wrapped_env = VectorListInfo(RecordEpisodeStatistics(env_to_wrap)) _, info = wrapped_env.reset(seed=SEED) wrapped_env.action_space.seed(SEED) assert isinstance(info, list) assert len(info) == NUM_ENVS for _ in range(ENV_STEPS): action = wrapped_env.action_space.sample() _, _, terminateds, truncateds, list_info = wrapped_env.step(action) for i, (terminated, truncated) in enumerate(zip(terminateds, truncateds)): if terminated or truncated: assert "episode" in list_info[i] for stats in ["r", "l", "t"]: assert stats in list_info[i]["episode"] assert isinstance(list_info[i]["episode"][stats], float) else: assert "episode" not in list_info[i] ================================================ FILE: tests/wrappers/test_video_recorder.py ================================================ import gc import os import re import time import pytest import gym from gym.wrappers.monitoring.video_recorder import VideoRecorder class BrokenRecordableEnv(gym.Env): metadata = {"render_modes": ["rgb_array_list"]} def __init__(self, render_mode="rgb_array_list"): self.render_mode = render_mode def render(self): pass class UnrecordableEnv(gym.Env): metadata = {"render_modes": [None]} def __init__(self, render_mode=None): self.render_mode = render_mode def render(self): pass def test_record_simple(): env = gym.make( "CartPole-v1", render_mode="rgb_array_list", disable_env_checker=True ) rec = VideoRecorder(env) env.reset() rec.capture_frame() rec.close() assert not rec.broken assert os.path.exists(rec.path) f = open(rec.path) assert os.fstat(f.fileno()).st_size > 100 def test_autoclose(): def record(): env = gym.make( "CartPole-v1", render_mode="rgb_array_list", disable_env_checker=True ) rec = VideoRecorder(env) env.reset() rec.capture_frame() rec_path = rec.path # The function ends without an explicit `rec.close()` call # The Python interpreter will implicitly do `del rec` on garbage cleaning return rec_path rec_path = record() gc.collect() # do explicit garbage collection for test time.sleep(5) # wait for subprocess exiting assert os.path.exists(rec_path) f = open(rec_path) assert os.fstat(f.fileno()).st_size > 100 def test_no_frames(): env = BrokenRecordableEnv() rec = VideoRecorder(env) rec.close() assert rec.functional assert not os.path.exists(rec.path) def test_record_unrecordable_method(): with pytest.warns( UserWarning, match=re.escape( "\x1b[33mWARN: Disabling video recorder because environment was not initialized with any compatible video mode between `rgb_array` and `rgb_array_list`\x1b[0m" ), ): env = UnrecordableEnv() rec = VideoRecorder(env) assert not rec.enabled rec.close() def test_record_breaking_render_method(): with pytest.warns( UserWarning, match=re.escape( "Env returned None on `render()`. Disabling further rendering for video recorder by marking as disabled:" ), ): env = BrokenRecordableEnv() rec = VideoRecorder(env) rec.capture_frame() rec.close() assert rec.broken assert not os.path.exists(rec.path) def test_text_envs(): env = gym.make( "FrozenLake-v1", render_mode="rgb_array_list", disable_env_checker=True ) video = VideoRecorder(env) try: env.reset() video.capture_frame() video.close() finally: os.remove(video.path) ================================================ FILE: tests/wrappers/utils.py ================================================ import gym def has_wrapper(wrapped_env: gym.Env, wrapper_type: type) -> bool: while isinstance(wrapped_env, gym.Wrapper): if isinstance(wrapped_env, wrapper_type): return True wrapped_env = wrapped_env.env return False