Showing preview only (1,225K chars total). Download the full file or copy to clipboard to get everything.
Repository: openai/gym
Branch: master
Commit: dcd185843a62
Files: 219
Total size: 1.1 MB
Directory structure:
gitextract_3_1zpoik/
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug.md
│ │ ├── proposal.md
│ │ └── question.md
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── stale.yml
│ └── workflows/
│ ├── build.yml
│ └── pre-commit.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.rst
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── bin/
│ └── docker_entrypoint
├── gym/
│ ├── __init__.py
│ ├── core.py
│ ├── envs/
│ │ ├── __init__.py
│ │ ├── box2d/
│ │ │ ├── __init__.py
│ │ │ ├── bipedal_walker.py
│ │ │ ├── car_dynamics.py
│ │ │ ├── car_racing.py
│ │ │ └── lunar_lander.py
│ │ ├── classic_control/
│ │ │ ├── __init__.py
│ │ │ ├── acrobot.py
│ │ │ ├── cartpole.py
│ │ │ ├── continuous_mountain_car.py
│ │ │ ├── mountain_car.py
│ │ │ ├── pendulum.py
│ │ │ └── utils.py
│ │ ├── mujoco/
│ │ │ ├── __init__.py
│ │ │ ├── ant.py
│ │ │ ├── ant_v3.py
│ │ │ ├── ant_v4.py
│ │ │ ├── assets/
│ │ │ │ ├── ant.xml
│ │ │ │ ├── half_cheetah.xml
│ │ │ │ ├── hopper.xml
│ │ │ │ ├── humanoid.xml
│ │ │ │ ├── humanoidstandup.xml
│ │ │ │ ├── inverted_double_pendulum.xml
│ │ │ │ ├── inverted_pendulum.xml
│ │ │ │ ├── point.xml
│ │ │ │ ├── pusher.xml
│ │ │ │ ├── reacher.xml
│ │ │ │ ├── swimmer.xml
│ │ │ │ └── walker2d.xml
│ │ │ ├── half_cheetah.py
│ │ │ ├── half_cheetah_v3.py
│ │ │ ├── half_cheetah_v4.py
│ │ │ ├── hopper.py
│ │ │ ├── hopper_v3.py
│ │ │ ├── hopper_v4.py
│ │ │ ├── humanoid.py
│ │ │ ├── humanoid_v3.py
│ │ │ ├── humanoid_v4.py
│ │ │ ├── humanoidstandup.py
│ │ │ ├── humanoidstandup_v4.py
│ │ │ ├── inverted_double_pendulum.py
│ │ │ ├── inverted_double_pendulum_v4.py
│ │ │ ├── inverted_pendulum.py
│ │ │ ├── inverted_pendulum_v4.py
│ │ │ ├── mujoco_env.py
│ │ │ ├── mujoco_rendering.py
│ │ │ ├── pusher.py
│ │ │ ├── pusher_v4.py
│ │ │ ├── reacher.py
│ │ │ ├── reacher_v4.py
│ │ │ ├── swimmer.py
│ │ │ ├── swimmer_v3.py
│ │ │ ├── swimmer_v4.py
│ │ │ ├── walker2d.py
│ │ │ ├── walker2d_v3.py
│ │ │ └── walker2d_v4.py
│ │ ├── registration.py
│ │ └── toy_text/
│ │ ├── __init__.py
│ │ ├── blackjack.py
│ │ ├── cliffwalking.py
│ │ ├── frozen_lake.py
│ │ ├── taxi.py
│ │ └── utils.py
│ ├── error.py
│ ├── logger.py
│ ├── py.typed
│ ├── spaces/
│ │ ├── __init__.py
│ │ ├── box.py
│ │ ├── dict.py
│ │ ├── discrete.py
│ │ ├── graph.py
│ │ ├── multi_binary.py
│ │ ├── multi_discrete.py
│ │ ├── sequence.py
│ │ ├── space.py
│ │ ├── text.py
│ │ ├── tuple.py
│ │ └── utils.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── colorize.py
│ │ ├── env_checker.py
│ │ ├── ezpickle.py
│ │ ├── passive_env_checker.py
│ │ ├── play.py
│ │ ├── save_video.py
│ │ ├── seeding.py
│ │ └── step_api_compatibility.py
│ ├── vector/
│ │ ├── __init__.py
│ │ ├── async_vector_env.py
│ │ ├── sync_vector_env.py
│ │ ├── utils/
│ │ │ ├── __init__.py
│ │ │ ├── misc.py
│ │ │ ├── numpy_utils.py
│ │ │ ├── shared_memory.py
│ │ │ └── spaces.py
│ │ └── vector_env.py
│ ├── version.py
│ └── wrappers/
│ ├── README.md
│ ├── __init__.py
│ ├── atari_preprocessing.py
│ ├── autoreset.py
│ ├── clip_action.py
│ ├── compatibility.py
│ ├── env_checker.py
│ ├── filter_observation.py
│ ├── flatten_observation.py
│ ├── frame_stack.py
│ ├── gray_scale_observation.py
│ ├── human_rendering.py
│ ├── monitoring/
│ │ ├── __init__.py
│ │ └── video_recorder.py
│ ├── normalize.py
│ ├── order_enforcing.py
│ ├── pixel_observation.py
│ ├── record_episode_statistics.py
│ ├── record_video.py
│ ├── render_collection.py
│ ├── rescale_action.py
│ ├── resize_observation.py
│ ├── step_api_compatibility.py
│ ├── time_aware_observation.py
│ ├── time_limit.py
│ ├── transform_observation.py
│ ├── transform_reward.py
│ └── vector_list_info.py
├── py.Dockerfile
├── pyproject.toml
├── requirements.txt
├── setup.py
├── test_requirements.txt
└── tests/
├── __init__.py
├── envs/
│ ├── __init__.py
│ ├── test_action_dim_check.py
│ ├── test_compatibility.py
│ ├── test_env_implementation.py
│ ├── test_envs.py
│ ├── test_make.py
│ ├── test_mujoco.py
│ ├── test_register.py
│ ├── test_spec.py
│ ├── utils.py
│ └── utils_envs.py
├── spaces/
│ ├── __init__.py
│ ├── test_box.py
│ ├── test_dict.py
│ ├── test_discrete.py
│ ├── test_graph.py
│ ├── test_multibinary.py
│ ├── test_multidiscrete.py
│ ├── test_sequence.py
│ ├── test_space.py
│ ├── test_spaces.py
│ ├── test_text.py
│ ├── test_tuple.py
│ ├── test_utils.py
│ └── utils.py
├── test_core.py
├── testing_env.py
├── utils/
│ ├── __init__.py
│ ├── test_env_checker.py
│ ├── test_passive_env_checker.py
│ ├── test_play.py
│ ├── test_save_video.py
│ ├── test_seeding.py
│ └── test_step_api_compatibility.py
├── vector/
│ ├── __init__.py
│ ├── test_async_vector_env.py
│ ├── test_numpy_utils.py
│ ├── test_shared_memory.py
│ ├── test_spaces.py
│ ├── test_sync_vector_env.py
│ ├── test_vector_env.py
│ ├── test_vector_env_info.py
│ ├── test_vector_env_wrapper.py
│ ├── test_vector_make.py
│ └── utils.py
└── wrappers/
├── __init__.py
├── test_atari_preprocessing.py
├── test_autoreset.py
├── test_clip_action.py
├── test_filter_observation.py
├── test_flatten.py
├── test_flatten_observation.py
├── test_frame_stack.py
├── test_gray_scale_observation.py
├── test_human_rendering.py
├── test_nested_dict.py
├── test_normalize.py
├── test_order_enforcing.py
├── test_passive_env_checker.py
├── test_pixel_observation.py
├── test_record_episode_statistics.py
├── test_record_video.py
├── test_rescale_action.py
├── test_resize_observation.py
├── test_step_compatibility.py
├── test_time_aware_observation.py
├── test_time_limit.py
├── test_transform_observation.py
├── test_transform_reward.py
├── test_vector_list_info.py
├── test_video_recorder.py
└── utils.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/ISSUE_TEMPLATE/bug.md
================================================
---
name: Bug Report
about: Submit a bug report
title: "[Bug Report] Bug title"
---
If you are submitting a bug report, please fill in the following details and use the tag [bug].
**Describe the bug**
A clear and concise description of what the bug is.
**Code example**
Please try to provide a minimal example to reproduce the bug. Error messages and stack traces are also helpful.
**System Info**
Describe the characteristic of your environment:
* Describe how Gym was installed (pip, docker, source, ...)
* What OS/version of Linux you're using. Note that while we will accept PRs to improve Window's support, we do not officially support it.
* Python version
**Additional context**
Add any other context about the problem here.
### Checklist
- [ ] I have checked that there is no similar [issue](https://github.com/openai/gym/issues) in the repo (**required**)
================================================
FILE: .github/ISSUE_TEMPLATE/proposal.md
================================================
---
name: Proposal
about: Propose changes that are not fixes bugs
title: "[Proposal] Proposal title"
---
### Proposal
A clear and concise description of the proposal.
### Motivation
Please outline the motivation for the proposal.
Is your feature request related to a problem? e.g.,"I'm always frustrated when [...]".
If this is related to another GitHub issue, please link here too.
### Pitch
A clear and concise description of what you want to happen.
### Alternatives
A clear and concise description of any alternative solutions or features you've considered, if any.
### Additional context
Add any other context or screenshots about the feature request here.
### Checklist
- [ ] I have checked that there is no similar [issue](https://github.com/openai/gym/issues) in the repo (**required**)
================================================
FILE: .github/ISSUE_TEMPLATE/question.md
================================================
---
name: Question
about: Ask a question
title: "[Question] Question title"
---
### Question
If you're a beginner and have basic questions, please ask on [r/reinforcementlearning](https://www.reddit.com/r/reinforcementlearning/) or in the [RL Discord](https://discord.com/invite/xhfNqQv) (if you're new please use the beginners channel). Basic questions that are not bugs or feature requests will be closed without reply, because GitHub issues are not an appropriate venue for these.
Advanced/nontrivial questions, especially in areas where documentation is lacking, are very much welcome.
================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
# Description
Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change.
Fixes # (issue)
## Type of change
Please delete options that are not relevant.
- [ ] Bug fix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
- [ ] This change requires a documentation update
### Screenshots
Please attach before and after screenshots of the change if applicable.
<!--
Example:
| Before | After |
| ------ | ----- |
| _gif/png before_ | _gif/png after_ |
To upload images to a PR -- simply drag and drop an image while in edit mode and it should upload the image directly. You can then paste that source into the above before/after sections.
-->
# Checklist:
- [ ] I have run the [`pre-commit` checks](https://pre-commit.com/) with `pre-commit run --all-files` (see `CONTRIBUTING.md` instructions to set it up)
- [ ] I have commented my code, particularly in hard-to-understand areas
- [ ] I have made corresponding changes to the documentation
- [ ] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my feature works
- [ ] New and existing unit tests pass locally with my changes
<!--
As you go through the checklist above, you can mark something as done by putting an x character in it
For example,
- [x] I have done this task
- [ ] I have not done this task
-->
================================================
FILE: .github/stale.yml
================================================
# Configuration for probot-stale - https://github.com/probot/stale
# Number of days of inactivity before an Issue or Pull Request becomes stale
daysUntilStale: 60
# Number of days of inactivity before an Issue or Pull Request with the stale label is closed.
# Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale.
daysUntilClose: 14
# Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled)
onlyLabels:
- more-information-needed
# Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable
exemptLabels:
- pinned
- security
- "[Status] Maybe Later"
# Set to true to ignore issues in a project (defaults to false)
exemptProjects: true
# Set to true to ignore issues in a milestone (defaults to false)
exemptMilestones: true
# Set to true to ignore issues with an assignee (defaults to false)
exemptAssignees: true
# Label to use when marking as stale
staleLabel: stale
# Comment to post when marking as stale. Set to `false` to disable
markComment: >
This issue has been automatically marked as stale because it has not had
recent activity. It will be closed if no further activity occurs. Thank you
for your contributions.
# Comment to post when removing the stale label.
# unmarkComment: >
# Your comment here.
# Comment to post when closing a stale Issue or Pull Request.
# closeComment: >
# Your comment here.
# Limit the number of actions per hour, from 1-30. Default is 30
limitPerRun: 30
# Limit to only `issues` or `pulls`
only: issues
# Optionally, specify configuration settings that are specific to just 'issues' or 'pulls':
# pulls:
# daysUntilStale: 30
# markComment: >
# This pull request has been automatically marked as stale because it has not had
# recent activity. It will be closed if no further activity occurs. Thank you
# for your contributions.
# issues:
# exemptLabels:
# - confirmed
================================================
FILE: .github/workflows/build.yml
================================================
name: build
on: [pull_request, push]
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.6', '3.7', '3.8', '3.9', '3.10']
steps:
- uses: actions/checkout@v2
- run: |
docker build -f py.Dockerfile \
--build-arg PYTHON_VERSION=${{ matrix.python-version }} \
--tag gym-docker .
- name: Run tests
run: docker run gym-docker pytest
================================================
FILE: .github/workflows/pre-commit.yml
================================================
# https://pre-commit.com
# This GitHub Action assumes that the repo contains a valid .pre-commit-config.yaml file.
name: pre-commit
on:
pull_request:
push:
branches: [master]
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-python@v2
- run: pip install pre-commit
- run: pre-commit --version
- run: pre-commit install
- run: pre-commit run --all-files
================================================
FILE: .gitignore
================================================
*.swp
*.pyc
*.py~
.DS_Store
.cache
.pytest_cache/
# Setuptools distribution and build folders.
/dist/
/build
# Virtualenv
/env
# Python egg metadata, regenerated from source files by setuptools.
/*.egg-info
*.sublime-project
*.sublime-workspace
logs/
.ipynb_checkpoints
ghostdriver.log
junk
MUJOCO_LOG.txt
rllab_mujoco
tutorial/*.html
# IDE files
.eggs
.tox
# PyCharm project files
.idea
vizdoom.ini
================================================
FILE: .pre-commit-config.yaml
================================================
---
repos:
- repo: https://github.com/python/black
rev: 22.3.0
hooks:
- id: black
- repo: https://github.com/codespell-project/codespell
rev: v2.1.0
hooks:
- id: codespell
args:
- --ignore-words-list=nd,reacher,thist,ths, ure, referenc
- repo: https://gitlab.com/PyCQA/flake8
rev: 4.0.1
hooks:
- id: flake8
args:
- '--per-file-ignores=*/__init__.py:F401 gym/envs/registration.py:E704'
- --ignore=E203,W503,E741
- --max-complexity=30
- --max-line-length=456
- --show-source
- --statistics
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
hooks:
- id: isort
args: ["--profile", "black"]
- repo: https://github.com/pycqa/pydocstyle
rev: 6.1.1 # pick a git hash / tag to point to
hooks:
- id: pydocstyle
exclude: ^(gym/version.py)|(gym/envs/)|(tests/)
args:
- --source
- --explain
- --convention=google
additional_dependencies: ["toml"]
- repo: https://github.com/asottile/pyupgrade
rev: v2.32.0
hooks:
- id: pyupgrade
# TODO: remove `--keep-runtime-typing` option
args: ["--py36-plus", "--keep-runtime-typing"]
- repo: local
hooks:
- id: pyright
name: pyright
entry: pyright
language: node
pass_filenames: false
types: [python]
additional_dependencies: ["pyright"]
args:
- --project=pyproject.toml
================================================
FILE: CODE_OF_CONDUCT.rst
================================================
OpenAI Gym is dedicated to providing a harassment-free experience for
everyone, regardless of gender, gender identity and expression, sexual
orientation, disability, physical appearance, body size, age, race, or
religion. We do not tolerate harassment of participants in any form.
This code of conduct applies to all OpenAI Gym spaces (including Gist
comments) both online and off. Anyone who violates this code of
conduct may be sanctioned or expelled from these spaces at the
discretion of the OpenAI team.
We may add additional rules over time, which will be made clearly
available to participants. Participants are responsible for knowing
and abiding by these rules.
================================================
FILE: CONTRIBUTING.md
================================================
# Gym Contribution Guidelines
At this time we are currently accepting the current forms of contributions:
- Bug reports (keep in mind that changing environment behavior should be minimized as that requires releasing a new version of the environment and makes results hard to compare across versions)
- Pull requests for bug fixes
- Documentation improvements
Notably, we are not accepting these forms of contributions:
- New environments
- New features
This may change in the future.
If you wish to make a Gym environment, follow the instructions in [Creating Environments](https://github.com/openai/gym/blob/master/docs/creating_environments.md). When your environment works, you can make a PR to add it to the bottom of the [List of Environments](https://github.com/openai/gym/blob/master/docs/third_party_environments.md).
Edit July 27, 2021: Please see https://github.com/openai/gym/issues/2259 for new contributing standards
# Development
This section contains technical instructions & hints for the contributors.
## Type checking
The project uses `pyright` to check types.
To type check locally, install `pyright` per official [instructions](https://github.com/microsoft/pyright#command-line).
It's configuration lives within `pyproject.toml`. It includes list of included and excluded files currently supporting type checks.
To run `pyright` for the project, run the pre-commit process (`pre-commit run --all-files`) or `pyright --project=pyproject.toml`
Alternatively, pyright is a built-in feature of VSCode that will automatically provide type hinting.
### Adding typing to more modules and packages
If you would like to add typing to a module in the project,
the list of included, excluded and strict files can be found in pyproject.toml (pyproject.toml -> [tool.pyright]).
To run `pyright` for the project, run the pre-commit process (`pre-commit run --all-files`) or `pyright`
## Git hooks
The CI will run several checks on the new code pushed to the Gym repository. These checks can also be run locally without waiting for the CI by following the steps below:
1. [install `pre-commit`](https://pre-commit.com/#install),
2. Install the Git hooks by running `pre-commit install`.
Once those two steps are done, the Git hooks will be run automatically at every new commit.
The Git hooks can also be run manually with `pre-commit run --all-files`, and if needed they can be skipped (not recommended) with `git commit --no-verify`.
**Note:** you may have to run `pre-commit run --all-files` manually a couple of times to make it pass when you commit, as each formatting tool will first format the code and fail the first time but should pass the second time.
Additionally, for pull requests, the project runs a number of tests for the whole project using [pytest](https://docs.pytest.org/en/latest/getting-started.html#install-pytest).
These tests can be run locally with `pytest` in the root folder.
## Docstrings
Pydocstyle has been added to the pre-commit process such that all new functions follow the [google docstring style](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html).
All new functions require either a short docstring, a single line explaining the purpose of a function
or a multiline docstring that documents each argument and the return type (if there is one) of the function.
In addition, new file and class require top docstrings that should outline the purpose of the file/class.
For classes, code block examples can be provided in the top docstring and not the constructor arguments.
To check your docstrings are correct, run `pre-commit run --all-files` or `pydocstyle --source --explain --convention=google`.
If all docstrings that fail, the source and reason for the failure is provided.
================================================
FILE: LICENSE.md
================================================
The MIT License
Copyright (c) 2016 OpenAI (https://openai.com)
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
# Mujoco models
This work is derived from [MuJuCo models](http://www.mujoco.org/forum/index.php?resources/) used under the following license:
```
This file is part of MuJoCo.
Copyright 2009-2015 Roboti LLC.
Mujoco :: Advanced physics simulation engine
Source : www.roboti.us
Version : 1.31
Released : 23Apr16
Author :: Vikash Kumar
Contacts : kumar@roboti.us
```
================================================
FILE: README.md
================================================
[](https://pre-commit.com/) [](https://github.com/psf/black)
## Important Notice
### The team that has been maintaining Gym since 2021 has moved all future development to [Gymnasium](https://github.com/Farama-Foundation/Gymnasium), a drop in replacement for Gym (import gymnasium as gym), and Gym will not be receiving any future updates. Please switch over to Gymnasium as soon as you're able to do so. If you'd like to read more about the story behind this switch, please check out [this blog post](https://farama.org/Announcing-The-Farama-Foundation).
## Gym
Gym is an open source Python library for developing and comparing reinforcement learning algorithms by providing a standard API to communicate between learning algorithms and environments, as well as a standard set of environments compliant with that API. Since its release, Gym's API has become the field standard for doing this.
Gym documentation website is at [https://www.gymlibrary.dev/](https://www.gymlibrary.dev/), and you can propose fixes and changes to it [here](https://github.com/Farama-Foundation/gym-docs).
Gym also has a discord server for development purposes that you can join here: https://discord.gg/nHg2JRN489
## Installation
To install the base Gym library, use `pip install gym`.
This does not include dependencies for all families of environments (there's a massive number, and some can be problematic to install on certain systems). You can install these dependencies for one family like `pip install gym[atari]` or use `pip install gym[all]` to install all dependencies.
We support Python 3.7, 3.8, 3.9 and 3.10 on Linux and macOS. We will accept PRs related to Windows, but do not officially support it.
## API
The Gym API's API models environments as simple Python `env` classes. Creating environment instances and interacting with them is very simple- here's an example using the "CartPole-v1" environment:
```python
import gym
env = gym.make("CartPole-v1")
observation, info = env.reset(seed=42)
for _ in range(1000):
action = env.action_space.sample()
observation, reward, terminated, truncated, info = env.step(action)
if terminated or truncated:
observation, info = env.reset()
env.close()
```
## Notable Related Libraries
Please note that this is an incomplete list, and just includes libraries that the maintainers most commonly point newcommers to when asked for recommendations.
* [CleanRL](https://github.com/vwxyzjn/cleanrl) is a learning library based on the Gym API. It is designed to cater to newer people in the field and provides very good reference implementations.
* [Tianshou](https://github.com/thu-ml/tianshou) is a learning library that's geared towards very experienced users and is design to allow for ease in complex algorithm modifications.
* [RLlib](https://docs.ray.io/en/latest/rllib/index.html) is a learning library that allows for distributed training and inferencing and supports an extraordinarily large number of features throughout the reinforcement learning space.
* [PettingZoo](https://github.com/Farama-Foundation/PettingZoo) is like Gym, but for environments with multiple agents.
## Environment Versioning
Gym keeps strict versioning for reproducibility reasons. All environments end in a suffix like "\_v0". When changes are made to environments that might impact learning results, the number is increased by one to prevent potential confusion.
## MuJoCo Environments
The latest "\_v4" and future versions of the MuJoCo environments will no longer depend on `mujoco-py`. Instead `mujoco` will be the required dependency for future gym MuJoCo environment versions. Old gym MuJoCo environment versions that depend on `mujoco-py` will still be kept but unmaintained.
To install the dependencies for the latest gym MuJoCo environments use `pip install gym[mujoco]`. Dependencies for old MuJoCo environments can still be installed by `pip install gym[mujoco_py]`.
## Citation
A whitepaper from when Gym just came out is available https://arxiv.org/pdf/1606.01540, and can be cited with the following bibtex entry:
```
@misc{1606.01540,
Author = {Greg Brockman and Vicki Cheung and Ludwig Pettersson and Jonas Schneider and John Schulman and Jie Tang and Wojciech Zaremba},
Title = {OpenAI Gym},
Year = {2016},
Eprint = {arXiv:1606.01540},
}
```
## Release Notes
There used to be release notes for all the new Gym versions here. New release notes are being moved to [releases page](https://github.com/openai/gym/releases) on GitHub, like most other libraries do. Old notes can be viewed [here](https://github.com/openai/gym/blob/31be35ecd460f670f0c4b653a14c9996b7facc6c/README.rst).
================================================
FILE: bin/docker_entrypoint
================================================
#!/bin/bash
# This script is the entrypoint for our Docker image.
set -ex
# Set up display; otherwise rendering will fail
Xvfb -screen 0 1024x768x24 &
export DISPLAY=:0
# Wait for the file to come up
display=0
file="/tmp/.X11-unix/X$display"
for i in $(seq 1 10); do
if [ -e "$file" ]; then
break
fi
echo "Waiting for $file to be created (try $i/10)"
sleep "$i"
done
if ! [ -e "$file" ]; then
echo "Timing out: $file was not created"
exit 1
fi
exec "$@"
================================================
FILE: gym/__init__.py
================================================
"""Root __init__ of the gym module setting the __all__ of gym modules."""
# isort: skip_file
from gym import error
from gym.version import VERSION as __version__
from gym.core import (
Env,
Wrapper,
ObservationWrapper,
ActionWrapper,
RewardWrapper,
)
from gym.spaces import Space
from gym.envs import make, spec, register
from gym import logger
from gym import vector
from gym import wrappers
import os
import sys
__all__ = ["Env", "Space", "Wrapper", "make", "spec", "register"]
# Initializing pygame initializes audio connections through SDL. SDL uses alsa by default on all Linux systems
# SDL connecting to alsa frequently create these giant lists of warnings every time you import an environment using
# pygame
# DSP is far more benign (and should probably be the default in SDL anyways)
if sys.platform.startswith("linux"):
os.environ["SDL_AUDIODRIVER"] = "dsp"
os.environ["PYGAME_HIDE_SUPPORT_PROMPT"] = "hide"
try:
import gym_notices.notices as notices
# print version warning if necessary
notice = notices.notices.get(__version__)
if notice:
print(notice, file=sys.stderr)
except Exception: # nosec
pass
================================================
FILE: gym/core.py
================================================
"""Core API for Environment, Wrapper, ActionWrapper, RewardWrapper and ObservationWrapper."""
import sys
from typing import (
TYPE_CHECKING,
Any,
Dict,
Generic,
List,
Optional,
SupportsFloat,
Tuple,
TypeVar,
Union,
)
import numpy as np
from gym import spaces
from gym.logger import warn
from gym.utils import seeding
if TYPE_CHECKING:
from gym.envs.registration import EnvSpec
if sys.version_info[0:2] == (3, 6):
warn(
"Gym minimally supports python 3.6 as the python foundation not longer supports the version, please update your version to 3.7+"
)
ObsType = TypeVar("ObsType")
ActType = TypeVar("ActType")
RenderFrame = TypeVar("RenderFrame")
class Env(Generic[ObsType, ActType]):
r"""The main OpenAI Gym class.
It encapsulates an environment with arbitrary behind-the-scenes dynamics.
An environment can be partially or fully observed.
The main API methods that users of this class need to know are:
- :meth:`step` - Takes a step in the environment using an action returning the next observation, reward,
if the environment terminated and observation information.
- :meth:`reset` - Resets the environment to an initial state, returning the initial observation and observation information.
- :meth:`render` - Renders the environment observation with modes depending on the output
- :meth:`close` - Closes the environment, important for rendering where pygame is imported
And set the following attributes:
- :attr:`action_space` - The Space object corresponding to valid actions
- :attr:`observation_space` - The Space object corresponding to valid observations
- :attr:`reward_range` - A tuple corresponding to the minimum and maximum possible rewards
- :attr:`spec` - An environment spec that contains the information used to initialise the environment from `gym.make`
- :attr:`metadata` - The metadata of the environment, i.e. render modes
- :attr:`np_random` - The random number generator for the environment
Note: a default reward range set to :math:`(-\infty,+\infty)` already exists. Set it if you want a narrower range.
"""
# Set this in SOME subclasses
metadata: Dict[str, Any] = {"render_modes": []}
# define render_mode if your environment supports rendering
render_mode: Optional[str] = None
reward_range = (-float("inf"), float("inf"))
spec: "EnvSpec" = None
# Set these in ALL subclasses
action_space: spaces.Space[ActType]
observation_space: spaces.Space[ObsType]
# Created
_np_random: Optional[np.random.Generator] = None
@property
def np_random(self) -> np.random.Generator:
"""Returns the environment's internal :attr:`_np_random` that if not set will initialise with a random seed."""
if self._np_random is None:
self._np_random, seed = seeding.np_random()
return self._np_random
@np_random.setter
def np_random(self, value: np.random.Generator):
self._np_random = value
def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]:
"""Run one timestep of the environment's dynamics.
When end of episode is reached, you are responsible for calling :meth:`reset` to reset this environment's state.
Accepts an action and returns either a tuple `(observation, reward, terminated, truncated, info)`.
Args:
action (ActType): an action provided by the agent
Returns:
observation (object): this will be an element of the environment's :attr:`observation_space`.
This may, for instance, be a numpy array containing the positions and velocities of certain objects.
reward (float): The amount of reward returned as a result of taking the action.
terminated (bool): whether a `terminal state` (as defined under the MDP of the task) is reached.
In this case further step() calls could return undefined results.
truncated (bool): whether a truncation condition outside the scope of the MDP is satisfied.
Typically a timelimit, but could also be used to indicate agent physically going out of bounds.
Can be used to end the episode prematurely before a `terminal state` is reached.
info (dictionary): `info` contains auxiliary diagnostic information (helpful for debugging, learning, and logging).
This might, for instance, contain: metrics that describe the agent's performance state, variables that are
hidden from observations, or individual reward terms that are combined to produce the total reward.
It also can contain information that distinguishes truncation and termination, however this is deprecated in favour
of returning two booleans, and will be removed in a future version.
(deprecated)
done (bool): A boolean value for if the episode has ended, in which case further :meth:`step` calls will return undefined results.
A done signal may be emitted for different reasons: Maybe the task underlying the environment was solved successfully,
a certain timelimit was exceeded, or the physics simulation has entered an invalid state.
"""
raise NotImplementedError
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
) -> Tuple[ObsType, dict]:
"""Resets the environment to an initial state and returns the initial observation.
This method can reset the environment's random number generator(s) if ``seed`` is an integer or
if the environment has not yet initialized a random number generator.
If the environment already has a random number generator and :meth:`reset` is called with ``seed=None``,
the RNG should not be reset. Moreover, :meth:`reset` should (in the typical use case) be called with an
integer seed right after initialization and then never again.
Args:
seed (optional int): The seed that is used to initialize the environment's PRNG.
If the environment does not already have a PRNG and ``seed=None`` (the default option) is passed,
a seed will be chosen from some source of entropy (e.g. timestamp or /dev/urandom).
However, if the environment already has a PRNG and ``seed=None`` is passed, the PRNG will *not* be reset.
If you pass an integer, the PRNG will be reset even if it already exists.
Usually, you want to pass an integer *right after the environment has been initialized and then never again*.
Please refer to the minimal example above to see this paradigm in action.
options (optional dict): Additional information to specify how the environment is reset (optional,
depending on the specific environment)
Returns:
observation (object): Observation of the initial state. This will be an element of :attr:`observation_space`
(typically a numpy array) and is analogous to the observation returned by :meth:`step`.
info (dictionary): This dictionary contains auxiliary information complementing ``observation``. It should be analogous to
the ``info`` returned by :meth:`step`.
"""
# Initialize the RNG if the seed is manually passed
if seed is not None:
self._np_random, seed = seeding.np_random(seed)
def render(self) -> Optional[Union[RenderFrame, List[RenderFrame]]]:
"""Compute the render frames as specified by render_mode attribute during initialization of the environment.
The set of supported modes varies per environment. (And some
third-party environments may not support rendering at all.)
By convention, if render_mode is:
- None (default): no render is computed.
- human: render return None.
The environment is continuously rendered in the current display or terminal. Usually for human consumption.
- rgb_array: return a single frame representing the current state of the environment.
A frame is a numpy.ndarray with shape (x, y, 3) representing RGB values for an x-by-y pixel image.
- rgb_array_list: return a list of frames representing the states of the environment since the last reset.
Each frame is a numpy.ndarray with shape (x, y, 3), as with `rgb_array`.
- ansi: Return a strings (str) or StringIO.StringIO containing a
terminal-style text representation for each time step.
The text can include newlines and ANSI escape sequences (e.g. for colors).
Note:
Make sure that your class's metadata 'render_modes' key includes
the list of supported modes. It's recommended to call super()
in implementations to use the functionality of this method.
"""
raise NotImplementedError
def close(self):
"""Override close in your subclass to perform any necessary cleanup.
Environments will automatically :meth:`close()` themselves when
garbage collected or when the program exits.
"""
pass
@property
def unwrapped(self) -> "Env":
"""Returns the base non-wrapped environment.
Returns:
Env: The base non-wrapped gym.Env instance
"""
return self
def __str__(self):
"""Returns a string of the environment with the spec id if specified."""
if self.spec is None:
return f"<{type(self).__name__} instance>"
else:
return f"<{type(self).__name__}<{self.spec.id}>>"
def __enter__(self):
"""Support with-statement for the environment."""
return self
def __exit__(self, *args):
"""Support with-statement for the environment."""
self.close()
# propagate exception
return False
class Wrapper(Env[ObsType, ActType]):
"""Wraps an environment to allow a modular transformation of the :meth:`step` and :meth:`reset` methods.
This class is the base class for all wrappers. The subclass could override
some methods to change the behavior of the original environment without touching the
original code.
Note:
Don't forget to call ``super().__init__(env)`` if the subclass overrides :meth:`__init__`.
"""
def __init__(self, env: Env):
"""Wraps an environment to allow a modular transformation of the :meth:`step` and :meth:`reset` methods.
Args:
env: The environment to wrap
"""
self.env = env
self._action_space: Optional[spaces.Space] = None
self._observation_space: Optional[spaces.Space] = None
self._reward_range: Optional[Tuple[SupportsFloat, SupportsFloat]] = None
self._metadata: Optional[dict] = None
def __getattr__(self, name):
"""Returns an attribute with ``name``, unless ``name`` starts with an underscore."""
if name.startswith("_"):
raise AttributeError(f"accessing private attribute '{name}' is prohibited")
return getattr(self.env, name)
@property
def spec(self):
"""Returns the environment specification."""
return self.env.spec
@classmethod
def class_name(cls):
"""Returns the class name of the wrapper."""
return cls.__name__
@property
def action_space(self) -> spaces.Space[ActType]:
"""Returns the action space of the environment."""
if self._action_space is None:
return self.env.action_space
return self._action_space
@action_space.setter
def action_space(self, space: spaces.Space):
self._action_space = space
@property
def observation_space(self) -> spaces.Space:
"""Returns the observation space of the environment."""
if self._observation_space is None:
return self.env.observation_space
return self._observation_space
@observation_space.setter
def observation_space(self, space: spaces.Space):
self._observation_space = space
@property
def reward_range(self) -> Tuple[SupportsFloat, SupportsFloat]:
"""Return the reward range of the environment."""
if self._reward_range is None:
return self.env.reward_range
return self._reward_range
@reward_range.setter
def reward_range(self, value: Tuple[SupportsFloat, SupportsFloat]):
self._reward_range = value
@property
def metadata(self) -> dict:
"""Returns the environment metadata."""
if self._metadata is None:
return self.env.metadata
return self._metadata
@metadata.setter
def metadata(self, value):
self._metadata = value
@property
def render_mode(self) -> Optional[str]:
"""Returns the environment render_mode."""
return self.env.render_mode
@property
def np_random(self) -> np.random.Generator:
"""Returns the environment np_random."""
return self.env.np_random
@np_random.setter
def np_random(self, value):
self.env.np_random = value
@property
def _np_random(self):
raise AttributeError(
"Can't access `_np_random` of a wrapper, use `.unwrapped._np_random` or `.np_random`."
)
def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, dict]:
"""Steps through the environment with action."""
return self.env.step(action)
def reset(self, **kwargs) -> Tuple[ObsType, dict]:
"""Resets the environment with kwargs."""
return self.env.reset(**kwargs)
def render(
self, *args, **kwargs
) -> Optional[Union[RenderFrame, List[RenderFrame]]]:
"""Renders the environment."""
return self.env.render(*args, **kwargs)
def close(self):
"""Closes the environment."""
return self.env.close()
def __str__(self):
"""Returns the wrapper name and the unwrapped environment string."""
return f"<{type(self).__name__}{self.env}>"
def __repr__(self):
"""Returns the string representation of the wrapper."""
return str(self)
@property
def unwrapped(self) -> Env:
"""Returns the base environment of the wrapper."""
return self.env.unwrapped
class ObservationWrapper(Wrapper):
"""Superclass of wrappers that can modify observations using :meth:`observation` for :meth:`reset` and :meth:`step`.
If you would like to apply a function to the observation that is returned by the base environment before
passing it to learning code, you can simply inherit from :class:`ObservationWrapper` and overwrite the method
:meth:`observation` to implement that transformation. The transformation defined in that method must be
defined on the base environment’s observation space. However, it may take values in a different space.
In that case, you need to specify the new observation space of the wrapper by setting :attr:`self.observation_space`
in the :meth:`__init__` method of your wrapper.
For example, you might have a 2D navigation task where the environment returns dictionaries as observations with
keys ``"agent_position"`` and ``"target_position"``. A common thing to do might be to throw away some degrees of
freedom and only consider the position of the target relative to the agent, i.e.
``observation["target_position"] - observation["agent_position"]``. For this, you could implement an
observation wrapper like this::
class RelativePosition(gym.ObservationWrapper):
def __init__(self, env):
super().__init__(env)
self.observation_space = Box(shape=(2,), low=-np.inf, high=np.inf)
def observation(self, obs):
return obs["target"] - obs["agent"]
Among others, Gym provides the observation wrapper :class:`TimeAwareObservation`, which adds information about the
index of the timestep to the observation.
"""
def reset(self, **kwargs):
"""Resets the environment, returning a modified observation using :meth:`self.observation`."""
obs, info = self.env.reset(**kwargs)
return self.observation(obs), info
def step(self, action):
"""Returns a modified observation using :meth:`self.observation` after calling :meth:`env.step`."""
observation, reward, terminated, truncated, info = self.env.step(action)
return self.observation(observation), reward, terminated, truncated, info
def observation(self, observation):
"""Returns a modified observation."""
raise NotImplementedError
class RewardWrapper(Wrapper):
"""Superclass of wrappers that can modify the returning reward from a step.
If you would like to apply a function to the reward that is returned by the base environment before
passing it to learning code, you can simply inherit from :class:`RewardWrapper` and overwrite the method
:meth:`reward` to implement that transformation.
This transformation might change the reward range; to specify the reward range of your wrapper,
you can simply define :attr:`self.reward_range` in :meth:`__init__`.
Let us look at an example: Sometimes (especially when we do not have control over the reward
because it is intrinsic), we want to clip the reward to a range to gain some numerical stability.
To do that, we could, for instance, implement the following wrapper::
class ClipReward(gym.RewardWrapper):
def __init__(self, env, min_reward, max_reward):
super().__init__(env)
self.min_reward = min_reward
self.max_reward = max_reward
self.reward_range = (min_reward, max_reward)
def reward(self, reward):
return np.clip(reward, self.min_reward, self.max_reward)
"""
def step(self, action):
"""Modifies the reward using :meth:`self.reward` after the environment :meth:`env.step`."""
observation, reward, terminated, truncated, info = self.env.step(action)
return observation, self.reward(reward), terminated, truncated, info
def reward(self, reward):
"""Returns a modified ``reward``."""
raise NotImplementedError
class ActionWrapper(Wrapper):
"""Superclass of wrappers that can modify the action before :meth:`env.step`.
If you would like to apply a function to the action before passing it to the base environment,
you can simply inherit from :class:`ActionWrapper` and overwrite the method :meth:`action` to implement
that transformation. The transformation defined in that method must take values in the base environment’s
action space. However, its domain might differ from the original action space.
In that case, you need to specify the new action space of the wrapper by setting :attr:`self.action_space` in
the :meth:`__init__` method of your wrapper.
Let’s say you have an environment with action space of type :class:`gym.spaces.Box`, but you would only like
to use a finite subset of actions. Then, you might want to implement the following wrapper::
class DiscreteActions(gym.ActionWrapper):
def __init__(self, env, disc_to_cont):
super().__init__(env)
self.disc_to_cont = disc_to_cont
self.action_space = Discrete(len(disc_to_cont))
def action(self, act):
return self.disc_to_cont[act]
if __name__ == "__main__":
env = gym.make("LunarLanderContinuous-v2")
wrapped_env = DiscreteActions(env, [np.array([1,0]), np.array([-1,0]),
np.array([0,1]), np.array([0,-1])])
print(wrapped_env.action_space) #Discrete(4)
Among others, Gym provides the action wrappers :class:`ClipAction` and :class:`RescaleAction`.
"""
def step(self, action):
"""Runs the environment :meth:`env.step` using the modified ``action`` from :meth:`self.action`."""
return self.env.step(self.action(action))
def action(self, action):
"""Returns a modified action before :meth:`env.step` is called."""
raise NotImplementedError
def reverse_action(self, action):
"""Returns a reversed ``action``."""
raise NotImplementedError
================================================
FILE: gym/envs/__init__.py
================================================
from gym.envs.registration import load_env_plugins as _load_env_plugins
from gym.envs.registration import make, register, registry, spec
# Hook to load plugins from entry points
_load_env_plugins()
# Classic
# ----------------------------------------
register(
id="CartPole-v0",
entry_point="gym.envs.classic_control.cartpole:CartPoleEnv",
max_episode_steps=200,
reward_threshold=195.0,
)
register(
id="CartPole-v1",
entry_point="gym.envs.classic_control.cartpole:CartPoleEnv",
max_episode_steps=500,
reward_threshold=475.0,
)
register(
id="MountainCar-v0",
entry_point="gym.envs.classic_control.mountain_car:MountainCarEnv",
max_episode_steps=200,
reward_threshold=-110.0,
)
register(
id="MountainCarContinuous-v0",
entry_point="gym.envs.classic_control.continuous_mountain_car:Continuous_MountainCarEnv",
max_episode_steps=999,
reward_threshold=90.0,
)
register(
id="Pendulum-v1",
entry_point="gym.envs.classic_control.pendulum:PendulumEnv",
max_episode_steps=200,
)
register(
id="Acrobot-v1",
entry_point="gym.envs.classic_control.acrobot:AcrobotEnv",
reward_threshold=-100.0,
max_episode_steps=500,
)
# Box2d
# ----------------------------------------
register(
id="LunarLander-v2",
entry_point="gym.envs.box2d.lunar_lander:LunarLander",
max_episode_steps=1000,
reward_threshold=200,
)
register(
id="LunarLanderContinuous-v2",
entry_point="gym.envs.box2d.lunar_lander:LunarLander",
kwargs={"continuous": True},
max_episode_steps=1000,
reward_threshold=200,
)
register(
id="BipedalWalker-v3",
entry_point="gym.envs.box2d.bipedal_walker:BipedalWalker",
max_episode_steps=1600,
reward_threshold=300,
)
register(
id="BipedalWalkerHardcore-v3",
entry_point="gym.envs.box2d.bipedal_walker:BipedalWalker",
kwargs={"hardcore": True},
max_episode_steps=2000,
reward_threshold=300,
)
register(
id="CarRacing-v2",
entry_point="gym.envs.box2d.car_racing:CarRacing",
max_episode_steps=1000,
reward_threshold=900,
)
# Toy Text
# ----------------------------------------
register(
id="Blackjack-v1",
entry_point="gym.envs.toy_text.blackjack:BlackjackEnv",
kwargs={"sab": True, "natural": False},
)
register(
id="FrozenLake-v1",
entry_point="gym.envs.toy_text.frozen_lake:FrozenLakeEnv",
kwargs={"map_name": "4x4"},
max_episode_steps=100,
reward_threshold=0.70, # optimum = 0.74
)
register(
id="FrozenLake8x8-v1",
entry_point="gym.envs.toy_text.frozen_lake:FrozenLakeEnv",
kwargs={"map_name": "8x8"},
max_episode_steps=200,
reward_threshold=0.85, # optimum = 0.91
)
register(
id="CliffWalking-v0",
entry_point="gym.envs.toy_text.cliffwalking:CliffWalkingEnv",
)
register(
id="Taxi-v3",
entry_point="gym.envs.toy_text.taxi:TaxiEnv",
reward_threshold=8, # optimum = 8.46
max_episode_steps=200,
)
# Mujoco
# ----------------------------------------
# 2D
register(
id="Reacher-v2",
entry_point="gym.envs.mujoco:ReacherEnv",
max_episode_steps=50,
reward_threshold=-3.75,
)
register(
id="Reacher-v4",
entry_point="gym.envs.mujoco.reacher_v4:ReacherEnv",
max_episode_steps=50,
reward_threshold=-3.75,
)
register(
id="Pusher-v2",
entry_point="gym.envs.mujoco:PusherEnv",
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id="Pusher-v4",
entry_point="gym.envs.mujoco.pusher_v4:PusherEnv",
max_episode_steps=100,
reward_threshold=0.0,
)
register(
id="InvertedPendulum-v2",
entry_point="gym.envs.mujoco:InvertedPendulumEnv",
max_episode_steps=1000,
reward_threshold=950.0,
)
register(
id="InvertedPendulum-v4",
entry_point="gym.envs.mujoco.inverted_pendulum_v4:InvertedPendulumEnv",
max_episode_steps=1000,
reward_threshold=950.0,
)
register(
id="InvertedDoublePendulum-v2",
entry_point="gym.envs.mujoco:InvertedDoublePendulumEnv",
max_episode_steps=1000,
reward_threshold=9100.0,
)
register(
id="InvertedDoublePendulum-v4",
entry_point="gym.envs.mujoco.inverted_double_pendulum_v4:InvertedDoublePendulumEnv",
max_episode_steps=1000,
reward_threshold=9100.0,
)
register(
id="HalfCheetah-v2",
entry_point="gym.envs.mujoco:HalfCheetahEnv",
max_episode_steps=1000,
reward_threshold=4800.0,
)
register(
id="HalfCheetah-v3",
entry_point="gym.envs.mujoco.half_cheetah_v3:HalfCheetahEnv",
max_episode_steps=1000,
reward_threshold=4800.0,
)
register(
id="HalfCheetah-v4",
entry_point="gym.envs.mujoco.half_cheetah_v4:HalfCheetahEnv",
max_episode_steps=1000,
reward_threshold=4800.0,
)
register(
id="Hopper-v2",
entry_point="gym.envs.mujoco:HopperEnv",
max_episode_steps=1000,
reward_threshold=3800.0,
)
register(
id="Hopper-v3",
entry_point="gym.envs.mujoco.hopper_v3:HopperEnv",
max_episode_steps=1000,
reward_threshold=3800.0,
)
register(
id="Hopper-v4",
entry_point="gym.envs.mujoco.hopper_v4:HopperEnv",
max_episode_steps=1000,
reward_threshold=3800.0,
)
register(
id="Swimmer-v2",
entry_point="gym.envs.mujoco:SwimmerEnv",
max_episode_steps=1000,
reward_threshold=360.0,
)
register(
id="Swimmer-v3",
entry_point="gym.envs.mujoco.swimmer_v3:SwimmerEnv",
max_episode_steps=1000,
reward_threshold=360.0,
)
register(
id="Swimmer-v4",
entry_point="gym.envs.mujoco.swimmer_v4:SwimmerEnv",
max_episode_steps=1000,
reward_threshold=360.0,
)
register(
id="Walker2d-v2",
max_episode_steps=1000,
entry_point="gym.envs.mujoco:Walker2dEnv",
)
register(
id="Walker2d-v3",
max_episode_steps=1000,
entry_point="gym.envs.mujoco.walker2d_v3:Walker2dEnv",
)
register(
id="Walker2d-v4",
max_episode_steps=1000,
entry_point="gym.envs.mujoco.walker2d_v4:Walker2dEnv",
)
register(
id="Ant-v2",
entry_point="gym.envs.mujoco:AntEnv",
max_episode_steps=1000,
reward_threshold=6000.0,
)
register(
id="Ant-v3",
entry_point="gym.envs.mujoco.ant_v3:AntEnv",
max_episode_steps=1000,
reward_threshold=6000.0,
)
register(
id="Ant-v4",
entry_point="gym.envs.mujoco.ant_v4:AntEnv",
max_episode_steps=1000,
reward_threshold=6000.0,
)
register(
id="Humanoid-v2",
entry_point="gym.envs.mujoco:HumanoidEnv",
max_episode_steps=1000,
)
register(
id="Humanoid-v3",
entry_point="gym.envs.mujoco.humanoid_v3:HumanoidEnv",
max_episode_steps=1000,
)
register(
id="Humanoid-v4",
entry_point="gym.envs.mujoco.humanoid_v4:HumanoidEnv",
max_episode_steps=1000,
)
register(
id="HumanoidStandup-v2",
entry_point="gym.envs.mujoco:HumanoidStandupEnv",
max_episode_steps=1000,
)
register(
id="HumanoidStandup-v4",
entry_point="gym.envs.mujoco.humanoidstandup_v4:HumanoidStandupEnv",
max_episode_steps=1000,
)
================================================
FILE: gym/envs/box2d/__init__.py
================================================
from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
from gym.envs.box2d.car_racing import CarRacing
from gym.envs.box2d.lunar_lander import LunarLander, LunarLanderContinuous
================================================
FILE: gym/envs/box2d/bipedal_walker.py
================================================
__credits__ = ["Andrea PIERRÉ"]
import math
from typing import TYPE_CHECKING, List, Optional
import numpy as np
import gym
from gym import error, spaces
from gym.error import DependencyNotInstalled
from gym.utils import EzPickle
try:
import Box2D
from Box2D.b2 import (
circleShape,
contactListener,
edgeShape,
fixtureDef,
polygonShape,
revoluteJointDef,
)
except ImportError:
raise DependencyNotInstalled("box2D is not installed, run `pip install gym[box2d]`")
if TYPE_CHECKING:
import pygame
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MOTORS_TORQUE = 80
SPEED_HIP = 4
SPEED_KNEE = 6
LIDAR_RANGE = 160 / SCALE
INITIAL_RANDOM = 5
HULL_POLY = [(-30, +9), (+6, +9), (+34, +1), (+34, -8), (-30, -8)]
LEG_DOWN = -8 / SCALE
LEG_W, LEG_H = 8 / SCALE, 34 / SCALE
VIEWPORT_W = 600
VIEWPORT_H = 400
TERRAIN_STEP = 14 / SCALE
TERRAIN_LENGTH = 200 # in steps
TERRAIN_HEIGHT = VIEWPORT_H / SCALE / 4
TERRAIN_GRASS = 10 # low long are grass spots, in steps
TERRAIN_STARTPAD = 20 # in steps
FRICTION = 2.5
HULL_FD = fixtureDef(
shape=polygonShape(vertices=[(x / SCALE, y / SCALE) for x, y in HULL_POLY]),
density=5.0,
friction=0.1,
categoryBits=0x0020,
maskBits=0x001, # collide only with ground
restitution=0.0,
) # 0.99 bouncy
LEG_FD = fixtureDef(
shape=polygonShape(box=(LEG_W / 2, LEG_H / 2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001,
)
LOWER_FD = fixtureDef(
shape=polygonShape(box=(0.8 * LEG_W / 2, LEG_H / 2)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001,
)
class ContactDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
if (
self.env.hull == contact.fixtureA.body
or self.env.hull == contact.fixtureB.body
):
self.env.game_over = True
for leg in [self.env.legs[1], self.env.legs[3]]:
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
leg.ground_contact = True
def EndContact(self, contact):
for leg in [self.env.legs[1], self.env.legs[3]]:
if leg in [contact.fixtureA.body, contact.fixtureB.body]:
leg.ground_contact = False
class BipedalWalker(gym.Env, EzPickle):
"""
### Description
This is a simple 4-joint walker robot environment.
There are two versions:
- Normal, with slightly uneven terrain.
- Hardcore, with ladders, stumps, pitfalls.
To solve the normal version, you need to get 300 points in 1600 time steps.
To solve the hardcore version, you need 300 points in 2000 time steps.
A heuristic is provided for testing. It's also useful to get demonstrations
to learn from. To run the heuristic:
```
python gym/envs/box2d/bipedal_walker.py
```
### Action Space
Actions are motor speed values in the [-1, 1] range for each of the
4 joints at both hips and knees.
### Observation Space
State consists of hull angle speed, angular velocity, horizontal speed,
vertical speed, position of joints and joints angular speed, legs contact
with ground, and 10 lidar rangefinder measurements. There are no coordinates
in the state vector.
### Rewards
Reward is given for moving forward, totaling 300+ points up to the far end.
If the robot falls, it gets -100. Applying motor torque costs a small
amount of points. A more optimal agent will get a better score.
### Starting State
The walker starts standing at the left end of the terrain with the hull
horizontal, and both legs in the same position with a slight knee angle.
### Episode Termination
The episode will terminate if the hull gets in contact with the ground or
if the walker exceeds the right end of the terrain length.
### Arguments
To use to the _hardcore_ environment, you need to specify the
`hardcore=True` argument like below:
```python
import gym
env = gym.make("BipedalWalker-v3", hardcore=True)
```
### Version History
- v3: returns closest lidar trace instead of furthest;
faster video recording
- v2: Count energy spent
- v1: Legs now report contact with ground; motors have higher torque and
speed; ground has higher friction; lidar rendered less nervously.
- v0: Initial version
<!-- ### References -->
### Credits
Created by Oleg Klimov
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": FPS,
}
def __init__(self, render_mode: Optional[str] = None, hardcore: bool = False):
EzPickle.__init__(self, render_mode, hardcore)
self.isopen = True
self.world = Box2D.b2World()
self.terrain: List[Box2D.b2Body] = []
self.hull: Optional[Box2D.b2Body] = None
self.prev_shaping = None
self.hardcore = hardcore
self.fd_polygon = fixtureDef(
shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)]),
friction=FRICTION,
)
self.fd_edge = fixtureDef(
shape=edgeShape(vertices=[(0, 0), (1, 1)]),
friction=FRICTION,
categoryBits=0x0001,
)
# we use 5.0 to represent the joints moving at maximum
# 5 x the rated speed due to impulses from ground contact etc.
low = np.array(
[
-math.pi,
-5.0,
-5.0,
-5.0,
-math.pi,
-5.0,
-math.pi,
-5.0,
-0.0,
-math.pi,
-5.0,
-math.pi,
-5.0,
-0.0,
]
+ [-1.0] * 10
).astype(np.float32)
high = np.array(
[
math.pi,
5.0,
5.0,
5.0,
math.pi,
5.0,
math.pi,
5.0,
5.0,
math.pi,
5.0,
math.pi,
5.0,
5.0,
]
+ [1.0] * 10
).astype(np.float32)
self.action_space = spaces.Box(
np.array([-1, -1, -1, -1]).astype(np.float32),
np.array([1, 1, 1, 1]).astype(np.float32),
)
self.observation_space = spaces.Box(low, high)
# state = [
# self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
# 2.0 * self.hull.angularVelocity / FPS,
# 0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range
# 0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS,
# self.joints[
# 0
# ].angle, # This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
# self.joints[0].speed / SPEED_HIP,
# self.joints[1].angle + 1.0,
# self.joints[1].speed / SPEED_KNEE,
# 1.0 if self.legs[1].ground_contact else 0.0,
# self.joints[2].angle,
# self.joints[2].speed / SPEED_HIP,
# self.joints[3].angle + 1.0,
# self.joints[3].speed / SPEED_KNEE,
# 1.0 if self.legs[3].ground_contact else 0.0,
# ]
# state += [l.fraction for l in self.lidar]
self.render_mode = render_mode
self.screen: Optional[pygame.Surface] = None
self.clock = None
def _destroy(self):
if not self.terrain:
return
self.world.contactListener = None
for t in self.terrain:
self.world.DestroyBody(t)
self.terrain = []
self.world.DestroyBody(self.hull)
self.hull = None
for leg in self.legs:
self.world.DestroyBody(leg)
self.legs = []
self.joints = []
def _generate_terrain(self, hardcore):
GRASS, STUMP, STAIRS, PIT, _STATES_ = range(5)
state = GRASS
velocity = 0.0
y = TERRAIN_HEIGHT
counter = TERRAIN_STARTPAD
oneshot = False
self.terrain = []
self.terrain_x = []
self.terrain_y = []
stair_steps, stair_width, stair_height = 0, 0, 0
original_y = 0
for i in range(TERRAIN_LENGTH):
x = i * TERRAIN_STEP
self.terrain_x.append(x)
if state == GRASS and not oneshot:
velocity = 0.8 * velocity + 0.01 * np.sign(TERRAIN_HEIGHT - y)
if i > TERRAIN_STARTPAD:
velocity += self.np_random.uniform(-1, 1) / SCALE # 1
y += velocity
elif state == PIT and oneshot:
counter = self.np_random.integers(3, 5)
poly = [
(x, y),
(x + TERRAIN_STEP, y),
(x + TERRAIN_STEP, y - 4 * TERRAIN_STEP),
(x, y - 4 * TERRAIN_STEP),
]
self.fd_polygon.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
self.terrain.append(t)
self.fd_polygon.shape.vertices = [
(p[0] + TERRAIN_STEP * counter, p[1]) for p in poly
]
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
self.terrain.append(t)
counter += 2
original_y = y
elif state == PIT and not oneshot:
y = original_y
if counter > 1:
y -= 4 * TERRAIN_STEP
elif state == STUMP and oneshot:
counter = self.np_random.integers(1, 3)
poly = [
(x, y),
(x + counter * TERRAIN_STEP, y),
(x + counter * TERRAIN_STEP, y + counter * TERRAIN_STEP),
(x, y + counter * TERRAIN_STEP),
]
self.fd_polygon.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
self.terrain.append(t)
elif state == STAIRS and oneshot:
stair_height = +1 if self.np_random.random() > 0.5 else -1
stair_width = self.np_random.integers(4, 5)
stair_steps = self.np_random.integers(3, 5)
original_y = y
for s in range(stair_steps):
poly = [
(
x + (s * stair_width) * TERRAIN_STEP,
y + (s * stair_height) * TERRAIN_STEP,
),
(
x + ((1 + s) * stair_width) * TERRAIN_STEP,
y + (s * stair_height) * TERRAIN_STEP,
),
(
x + ((1 + s) * stair_width) * TERRAIN_STEP,
y + (-1 + s * stair_height) * TERRAIN_STEP,
),
(
x + (s * stair_width) * TERRAIN_STEP,
y + (-1 + s * stair_height) * TERRAIN_STEP,
),
]
self.fd_polygon.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_polygon)
t.color1, t.color2 = (255, 255, 255), (153, 153, 153)
self.terrain.append(t)
counter = stair_steps * stair_width
elif state == STAIRS and not oneshot:
s = stair_steps * stair_width - counter - stair_height
n = s / stair_width
y = original_y + (n * stair_height) * TERRAIN_STEP
oneshot = False
self.terrain_y.append(y)
counter -= 1
if counter == 0:
counter = self.np_random.integers(TERRAIN_GRASS / 2, TERRAIN_GRASS)
if state == GRASS and hardcore:
state = self.np_random.integers(1, _STATES_)
oneshot = True
else:
state = GRASS
oneshot = True
self.terrain_poly = []
for i in range(TERRAIN_LENGTH - 1):
poly = [
(self.terrain_x[i], self.terrain_y[i]),
(self.terrain_x[i + 1], self.terrain_y[i + 1]),
]
self.fd_edge.shape.vertices = poly
t = self.world.CreateStaticBody(fixtures=self.fd_edge)
color = (76, 255 if i % 2 == 0 else 204, 76)
t.color1 = color
t.color2 = color
self.terrain.append(t)
color = (102, 153, 76)
poly += [(poly[1][0], 0), (poly[0][0], 0)]
self.terrain_poly.append((poly, color))
self.terrain.reverse()
def _generate_clouds(self):
# Sorry for the clouds, couldn't resist
self.cloud_poly = []
for i in range(TERRAIN_LENGTH // 20):
x = self.np_random.uniform(0, TERRAIN_LENGTH) * TERRAIN_STEP
y = VIEWPORT_H / SCALE * 3 / 4
poly = [
(
x
+ 15 * TERRAIN_STEP * math.sin(3.14 * 2 * a / 5)
+ self.np_random.uniform(0, 5 * TERRAIN_STEP),
y
+ 5 * TERRAIN_STEP * math.cos(3.14 * 2 * a / 5)
+ self.np_random.uniform(0, 5 * TERRAIN_STEP),
)
for a in range(5)
]
x1 = min(p[0] for p in poly)
x2 = max(p[0] for p in poly)
self.cloud_poly.append((poly, x1, x2))
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
self._destroy()
self.world.contactListener_bug_workaround = ContactDetector(self)
self.world.contactListener = self.world.contactListener_bug_workaround
self.game_over = False
self.prev_shaping = None
self.scroll = 0.0
self.lidar_render = 0
self._generate_terrain(self.hardcore)
self._generate_clouds()
init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2
init_y = TERRAIN_HEIGHT + 2 * LEG_H
self.hull = self.world.CreateDynamicBody(
position=(init_x, init_y), fixtures=HULL_FD
)
self.hull.color1 = (127, 51, 229)
self.hull.color2 = (76, 76, 127)
self.hull.ApplyForceToCenter(
(self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True
)
self.legs: List[Box2D.b2Body] = []
self.joints: List[Box2D.b2RevoluteJoint] = []
for i in [-1, +1]:
leg = self.world.CreateDynamicBody(
position=(init_x, init_y - LEG_H / 2 - LEG_DOWN),
angle=(i * 0.05),
fixtures=LEG_FD,
)
leg.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25)
leg.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25)
rjd = revoluteJointDef(
bodyA=self.hull,
bodyB=leg,
localAnchorA=(0, LEG_DOWN),
localAnchorB=(0, LEG_H / 2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed=i,
lowerAngle=-0.8,
upperAngle=1.1,
)
self.legs.append(leg)
self.joints.append(self.world.CreateJoint(rjd))
lower = self.world.CreateDynamicBody(
position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN),
angle=(i * 0.05),
fixtures=LOWER_FD,
)
lower.color1 = (153 - i * 25, 76 - i * 25, 127 - i * 25)
lower.color2 = (102 - i * 25, 51 - i * 25, 76 - i * 25)
rjd = revoluteJointDef(
bodyA=leg,
bodyB=lower,
localAnchorA=(0, -LEG_H / 2),
localAnchorB=(0, LEG_H / 2),
enableMotor=True,
enableLimit=True,
maxMotorTorque=MOTORS_TORQUE,
motorSpeed=1,
lowerAngle=-1.6,
upperAngle=-0.1,
)
lower.ground_contact = False
self.legs.append(lower)
self.joints.append(self.world.CreateJoint(rjd))
self.drawlist = self.terrain + self.legs + [self.hull]
class LidarCallback(Box2D.b2.rayCastCallback):
def ReportFixture(self, fixture, point, normal, fraction):
if (fixture.filterData.categoryBits & 1) == 0:
return -1
self.p2 = point
self.fraction = fraction
return fraction
self.lidar = [LidarCallback() for _ in range(10)]
if self.render_mode == "human":
self.render()
return self.step(np.array([0, 0, 0, 0]))[0], {}
def step(self, action: np.ndarray):
assert self.hull is not None
# self.hull.ApplyForceToCenter((0, 20), True) -- Uncomment this to receive a bit of stability help
control_speed = False # Should be easier as well
if control_speed:
self.joints[0].motorSpeed = float(SPEED_HIP * np.clip(action[0], -1, 1))
self.joints[1].motorSpeed = float(SPEED_KNEE * np.clip(action[1], -1, 1))
self.joints[2].motorSpeed = float(SPEED_HIP * np.clip(action[2], -1, 1))
self.joints[3].motorSpeed = float(SPEED_KNEE * np.clip(action[3], -1, 1))
else:
self.joints[0].motorSpeed = float(SPEED_HIP * np.sign(action[0]))
self.joints[0].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[0]), 0, 1)
)
self.joints[1].motorSpeed = float(SPEED_KNEE * np.sign(action[1]))
self.joints[1].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[1]), 0, 1)
)
self.joints[2].motorSpeed = float(SPEED_HIP * np.sign(action[2]))
self.joints[2].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[2]), 0, 1)
)
self.joints[3].motorSpeed = float(SPEED_KNEE * np.sign(action[3]))
self.joints[3].maxMotorTorque = float(
MOTORS_TORQUE * np.clip(np.abs(action[3]), 0, 1)
)
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
pos = self.hull.position
vel = self.hull.linearVelocity
for i in range(10):
self.lidar[i].fraction = 1.0
self.lidar[i].p1 = pos
self.lidar[i].p2 = (
pos[0] + math.sin(1.5 * i / 10.0) * LIDAR_RANGE,
pos[1] - math.cos(1.5 * i / 10.0) * LIDAR_RANGE,
)
self.world.RayCast(self.lidar[i], self.lidar[i].p1, self.lidar[i].p2)
state = [
self.hull.angle, # Normal angles up to 0.5 here, but sure more is possible.
2.0 * self.hull.angularVelocity / FPS,
0.3 * vel.x * (VIEWPORT_W / SCALE) / FPS, # Normalized to get -1..1 range
0.3 * vel.y * (VIEWPORT_H / SCALE) / FPS,
self.joints[0].angle,
# This will give 1.1 on high up, but it's still OK (and there should be spikes on hiting the ground, that's normal too)
self.joints[0].speed / SPEED_HIP,
self.joints[1].angle + 1.0,
self.joints[1].speed / SPEED_KNEE,
1.0 if self.legs[1].ground_contact else 0.0,
self.joints[2].angle,
self.joints[2].speed / SPEED_HIP,
self.joints[3].angle + 1.0,
self.joints[3].speed / SPEED_KNEE,
1.0 if self.legs[3].ground_contact else 0.0,
]
state += [l.fraction for l in self.lidar]
assert len(state) == 24
self.scroll = pos.x - VIEWPORT_W / SCALE / 5
shaping = (
130 * pos[0] / SCALE
) # moving forward is a way to receive reward (normalized to get 300 on completion)
shaping -= 5.0 * abs(
state[0]
) # keep head straight, other than that and falling, any behavior is unpunished
reward = 0
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
for a in action:
reward -= 0.00035 * MOTORS_TORQUE * np.clip(np.abs(a), 0, 1)
# normalized to about -50.0 using heuristic, more optimal agent should spend less
terminated = False
if self.game_over or pos[0] < 0:
reward = -100
terminated = True
if pos[0] > (TERRAIN_LENGTH - TERRAIN_GRASS) * TERRAIN_STEP:
terminated = True
if self.render_mode == "human":
self.render()
return np.array(state, dtype=np.float32), reward, terminated, False, {}
def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
from pygame import gfxdraw
except ImportError:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gym[box2d]`"
)
if self.screen is None and self.render_mode == "human":
pygame.init()
pygame.display.init()
self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H))
if self.clock is None:
self.clock = pygame.time.Clock()
self.surf = pygame.Surface(
(VIEWPORT_W + max(0.0, self.scroll) * SCALE, VIEWPORT_H)
)
pygame.transform.scale(self.surf, (SCALE, SCALE))
pygame.draw.polygon(
self.surf,
color=(215, 215, 255),
points=[
(self.scroll * SCALE, 0),
(self.scroll * SCALE + VIEWPORT_W, 0),
(self.scroll * SCALE + VIEWPORT_W, VIEWPORT_H),
(self.scroll * SCALE, VIEWPORT_H),
],
)
for poly, x1, x2 in self.cloud_poly:
if x2 < self.scroll / 2:
continue
if x1 > self.scroll / 2 + VIEWPORT_W / SCALE:
continue
pygame.draw.polygon(
self.surf,
color=(255, 255, 255),
points=[
(p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly
],
)
gfxdraw.aapolygon(
self.surf,
[(p[0] * SCALE + self.scroll * SCALE / 2, p[1] * SCALE) for p in poly],
(255, 255, 255),
)
for poly, color in self.terrain_poly:
if poly[1][0] < self.scroll:
continue
if poly[0][0] > self.scroll + VIEWPORT_W / SCALE:
continue
scaled_poly = []
for coord in poly:
scaled_poly.append([coord[0] * SCALE, coord[1] * SCALE])
pygame.draw.polygon(self.surf, color=color, points=scaled_poly)
gfxdraw.aapolygon(self.surf, scaled_poly, color)
self.lidar_render = (self.lidar_render + 1) % 100
i = self.lidar_render
if i < 2 * len(self.lidar):
single_lidar = (
self.lidar[i]
if i < len(self.lidar)
else self.lidar[len(self.lidar) - i - 1]
)
if hasattr(single_lidar, "p1") and hasattr(single_lidar, "p2"):
pygame.draw.line(
self.surf,
color=(255, 0, 0),
start_pos=(single_lidar.p1[0] * SCALE, single_lidar.p1[1] * SCALE),
end_pos=(single_lidar.p2[0] * SCALE, single_lidar.p2[1] * SCALE),
width=1,
)
for obj in self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
if type(f.shape) is circleShape:
pygame.draw.circle(
self.surf,
color=obj.color1,
center=trans * f.shape.pos * SCALE,
radius=f.shape.radius * SCALE,
)
pygame.draw.circle(
self.surf,
color=obj.color2,
center=trans * f.shape.pos * SCALE,
radius=f.shape.radius * SCALE,
)
else:
path = [trans * v * SCALE for v in f.shape.vertices]
if len(path) > 2:
pygame.draw.polygon(self.surf, color=obj.color1, points=path)
gfxdraw.aapolygon(self.surf, path, obj.color1)
path.append(path[0])
pygame.draw.polygon(
self.surf, color=obj.color2, points=path, width=1
)
gfxdraw.aapolygon(self.surf, path, obj.color2)
else:
pygame.draw.aaline(
self.surf,
start_pos=path[0],
end_pos=path[1],
color=obj.color1,
)
flagy1 = TERRAIN_HEIGHT * SCALE
flagy2 = flagy1 + 50
x = TERRAIN_STEP * 3 * SCALE
pygame.draw.aaline(
self.surf, color=(0, 0, 0), start_pos=(x, flagy1), end_pos=(x, flagy2)
)
f = [
(x, flagy2),
(x, flagy2 - 10),
(x + 25, flagy2 - 5),
]
pygame.draw.polygon(self.surf, color=(230, 51, 0), points=f)
pygame.draw.lines(
self.surf, color=(0, 0, 0), points=f + [f[0]], width=1, closed=False
)
self.surf = pygame.transform.flip(self.surf, False, True)
if self.render_mode == "human":
assert self.screen is not None
self.screen.blit(self.surf, (-self.scroll * SCALE, 0))
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
pygame.display.flip()
elif self.render_mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.surf)), axes=(1, 0, 2)
)[:, -VIEWPORT_W:]
def close(self):
if self.screen is not None:
import pygame
pygame.display.quit()
pygame.quit()
self.isopen = False
class BipedalWalkerHardcore:
def __init__(self):
raise error.Error(
"Error initializing BipedalWalkerHardcore Environment.\n"
"Currently, we do not support initializing this mode of environment by calling the class directly.\n"
"To use this environment, instead create it by specifying the hardcore keyword in gym.make, i.e.\n"
'gym.make("BipedalWalker-v3", hardcore=True)'
)
if __name__ == "__main__":
# Heurisic: suboptimal, have no notion of balance.
env = BipedalWalker()
env.reset()
steps = 0
total_reward = 0
a = np.array([0.0, 0.0, 0.0, 0.0])
STAY_ON_ONE_LEG, PUT_OTHER_DOWN, PUSH_OFF = 1, 2, 3
SPEED = 0.29 # Will fall forward on higher speed
state = STAY_ON_ONE_LEG
moving_leg = 0
supporting_leg = 1 - moving_leg
SUPPORT_KNEE_ANGLE = +0.1
supporting_knee_angle = SUPPORT_KNEE_ANGLE
while True:
s, r, terminated, truncated, info = env.step(a)
total_reward += r
if steps % 20 == 0 or terminated or truncated:
print("\naction " + str([f"{x:+0.2f}" for x in a]))
print(f"step {steps} total_reward {total_reward:+0.2f}")
print("hull " + str([f"{x:+0.2f}" for x in s[0:4]]))
print("leg0 " + str([f"{x:+0.2f}" for x in s[4:9]]))
print("leg1 " + str([f"{x:+0.2f}" for x in s[9:14]]))
steps += 1
contact0 = s[8]
contact1 = s[13]
moving_s_base = 4 + 5 * moving_leg
supporting_s_base = 4 + 5 * supporting_leg
hip_targ = [None, None] # -0.8 .. +1.1
knee_targ = [None, None] # -0.6 .. +0.9
hip_todo = [0.0, 0.0]
knee_todo = [0.0, 0.0]
if state == STAY_ON_ONE_LEG:
hip_targ[moving_leg] = 1.1
knee_targ[moving_leg] = -0.6
supporting_knee_angle += 0.03
if s[2] > SPEED:
supporting_knee_angle += 0.03
supporting_knee_angle = min(supporting_knee_angle, SUPPORT_KNEE_ANGLE)
knee_targ[supporting_leg] = supporting_knee_angle
if s[supporting_s_base + 0] < 0.10: # supporting leg is behind
state = PUT_OTHER_DOWN
if state == PUT_OTHER_DOWN:
hip_targ[moving_leg] = +0.1
knee_targ[moving_leg] = SUPPORT_KNEE_ANGLE
knee_targ[supporting_leg] = supporting_knee_angle
if s[moving_s_base + 4]:
state = PUSH_OFF
supporting_knee_angle = min(s[moving_s_base + 2], SUPPORT_KNEE_ANGLE)
if state == PUSH_OFF:
knee_targ[moving_leg] = supporting_knee_angle
knee_targ[supporting_leg] = +1.0
if s[supporting_s_base + 2] > 0.88 or s[2] > 1.2 * SPEED:
state = STAY_ON_ONE_LEG
moving_leg = 1 - moving_leg
supporting_leg = 1 - moving_leg
if hip_targ[0]:
hip_todo[0] = 0.9 * (hip_targ[0] - s[4]) - 0.25 * s[5]
if hip_targ[1]:
hip_todo[1] = 0.9 * (hip_targ[1] - s[9]) - 0.25 * s[10]
if knee_targ[0]:
knee_todo[0] = 4.0 * (knee_targ[0] - s[6]) - 0.25 * s[7]
if knee_targ[1]:
knee_todo[1] = 4.0 * (knee_targ[1] - s[11]) - 0.25 * s[12]
hip_todo[0] -= 0.9 * (0 - s[0]) - 1.5 * s[1] # PID to keep head strait
hip_todo[1] -= 0.9 * (0 - s[0]) - 1.5 * s[1]
knee_todo[0] -= 15.0 * s[3] # vertical speed, to damp oscillations
knee_todo[1] -= 15.0 * s[3]
a[0] = hip_todo[0]
a[1] = knee_todo[0]
a[2] = hip_todo[1]
a[3] = knee_todo[1]
a = np.clip(0.5 * a, -1.0, 1.0)
if terminated or truncated:
break
================================================
FILE: gym/envs/box2d/car_dynamics.py
================================================
"""
Top-down car dynamics simulation.
Some ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-down-car by Chris Campbell.
This simulation is a bit more detailed, with wheels rotation.
Created by Oleg Klimov
"""
import math
import Box2D
import numpy as np
from gym.error import DependencyNotInstalled
try:
from Box2D.b2 import fixtureDef, polygonShape, revoluteJointDef
except ImportError:
raise DependencyNotInstalled("box2D is not installed, run `pip install gym[box2d]`")
SIZE = 0.02
ENGINE_POWER = 100000000 * SIZE * SIZE
WHEEL_MOMENT_OF_INERTIA = 4000 * SIZE * SIZE
FRICTION_LIMIT = (
1000000 * SIZE * SIZE
) # friction ~= mass ~= size^2 (calculated implicitly using density)
WHEEL_R = 27
WHEEL_W = 14
WHEELPOS = [(-55, +80), (+55, +80), (-55, -82), (+55, -82)]
HULL_POLY1 = [(-60, +130), (+60, +130), (+60, +110), (-60, +110)]
HULL_POLY2 = [(-15, +120), (+15, +120), (+20, +20), (-20, 20)]
HULL_POLY3 = [
(+25, +20),
(+50, -10),
(+50, -40),
(+20, -90),
(-20, -90),
(-50, -40),
(-50, -10),
(-25, +20),
]
HULL_POLY4 = [(-50, -120), (+50, -120), (+50, -90), (-50, -90)]
WHEEL_COLOR = (0, 0, 0)
WHEEL_WHITE = (77, 77, 77)
MUD_COLOR = (102, 102, 0)
class Car:
def __init__(self, world, init_angle, init_x, init_y):
self.world: Box2D.b2World = world
self.hull: Box2D.b2Body = self.world.CreateDynamicBody(
position=(init_x, init_y),
angle=init_angle,
fixtures=[
fixtureDef(
shape=polygonShape(
vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY1]
),
density=1.0,
),
fixtureDef(
shape=polygonShape(
vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY2]
),
density=1.0,
),
fixtureDef(
shape=polygonShape(
vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY3]
),
density=1.0,
),
fixtureDef(
shape=polygonShape(
vertices=[(x * SIZE, y * SIZE) for x, y in HULL_POLY4]
),
density=1.0,
),
],
)
self.hull.color = (0.8, 0.0, 0.0)
self.wheels = []
self.fuel_spent = 0.0
WHEEL_POLY = [
(-WHEEL_W, +WHEEL_R),
(+WHEEL_W, +WHEEL_R),
(+WHEEL_W, -WHEEL_R),
(-WHEEL_W, -WHEEL_R),
]
for wx, wy in WHEELPOS:
front_k = 1.0 if wy > 0 else 1.0
w = self.world.CreateDynamicBody(
position=(init_x + wx * SIZE, init_y + wy * SIZE),
angle=init_angle,
fixtures=fixtureDef(
shape=polygonShape(
vertices=[
(x * front_k * SIZE, y * front_k * SIZE)
for x, y in WHEEL_POLY
]
),
density=0.1,
categoryBits=0x0020,
maskBits=0x001,
restitution=0.0,
),
)
w.wheel_rad = front_k * WHEEL_R * SIZE
w.color = WHEEL_COLOR
w.gas = 0.0
w.brake = 0.0
w.steer = 0.0
w.phase = 0.0 # wheel angle
w.omega = 0.0 # angular velocity
w.skid_start = None
w.skid_particle = None
rjd = revoluteJointDef(
bodyA=self.hull,
bodyB=w,
localAnchorA=(wx * SIZE, wy * SIZE),
localAnchorB=(0, 0),
enableMotor=True,
enableLimit=True,
maxMotorTorque=180 * 900 * SIZE * SIZE,
motorSpeed=0,
lowerAngle=-0.4,
upperAngle=+0.4,
)
w.joint = self.world.CreateJoint(rjd)
w.tiles = set()
w.userData = w
self.wheels.append(w)
self.drawlist = self.wheels + [self.hull]
self.particles = []
def gas(self, gas):
"""control: rear wheel drive
Args:
gas (float): How much gas gets applied. Gets clipped between 0 and 1.
"""
gas = np.clip(gas, 0, 1)
for w in self.wheels[2:4]:
diff = gas - w.gas
if diff > 0.1:
diff = 0.1 # gradually increase, but stop immediately
w.gas += diff
def brake(self, b):
"""control: brake
Args:
b (0..1): Degree to which the brakes are applied. More than 0.9 blocks the wheels to zero rotation"""
for w in self.wheels:
w.brake = b
def steer(self, s):
"""control: steer
Args:
s (-1..1): target position, it takes time to rotate steering wheel from side-to-side"""
self.wheels[0].steer = s
self.wheels[1].steer = s
def step(self, dt):
for w in self.wheels:
# Steer each wheel
dir = np.sign(w.steer - w.joint.angle)
val = abs(w.steer - w.joint.angle)
w.joint.motorSpeed = dir * min(50.0 * val, 3.0)
# Position => friction_limit
grass = True
friction_limit = FRICTION_LIMIT * 0.6 # Grass friction if no tile
for tile in w.tiles:
friction_limit = max(
friction_limit, FRICTION_LIMIT * tile.road_friction
)
grass = False
# Force
forw = w.GetWorldVector((0, 1))
side = w.GetWorldVector((1, 0))
v = w.linearVelocity
vf = forw[0] * v[0] + forw[1] * v[1] # forward speed
vs = side[0] * v[0] + side[1] * v[1] # side speed
# WHEEL_MOMENT_OF_INERTIA*np.square(w.omega)/2 = E -- energy
# WHEEL_MOMENT_OF_INERTIA*w.omega * domega/dt = dE/dt = W -- power
# domega = dt*W/WHEEL_MOMENT_OF_INERTIA/w.omega
# add small coef not to divide by zero
w.omega += (
dt
* ENGINE_POWER
* w.gas
/ WHEEL_MOMENT_OF_INERTIA
/ (abs(w.omega) + 5.0)
)
self.fuel_spent += dt * ENGINE_POWER * w.gas
if w.brake >= 0.9:
w.omega = 0
elif w.brake > 0:
BRAKE_FORCE = 15 # radians per second
dir = -np.sign(w.omega)
val = BRAKE_FORCE * w.brake
if abs(val) > abs(w.omega):
val = abs(w.omega) # low speed => same as = 0
w.omega += dir * val
w.phase += w.omega * dt
vr = w.omega * w.wheel_rad # rotating wheel speed
f_force = -vf + vr # force direction is direction of speed difference
p_force = -vs
# Physically correct is to always apply friction_limit until speed is equal.
# But dt is finite, that will lead to oscillations if difference is already near zero.
# Random coefficient to cut oscillations in few steps (have no effect on friction_limit)
f_force *= 205000 * SIZE * SIZE
p_force *= 205000 * SIZE * SIZE
force = np.sqrt(np.square(f_force) + np.square(p_force))
# Skid trace
if abs(force) > 2.0 * friction_limit:
if (
w.skid_particle
and w.skid_particle.grass == grass
and len(w.skid_particle.poly) < 30
):
w.skid_particle.poly.append((w.position[0], w.position[1]))
elif w.skid_start is None:
w.skid_start = w.position
else:
w.skid_particle = self._create_particle(
w.skid_start, w.position, grass
)
w.skid_start = None
else:
w.skid_start = None
w.skid_particle = None
if abs(force) > friction_limit:
f_force /= force
p_force /= force
force = friction_limit # Correct physics here
f_force *= force
p_force *= force
w.omega -= dt * f_force * w.wheel_rad / WHEEL_MOMENT_OF_INERTIA
w.ApplyForceToCenter(
(
p_force * side[0] + f_force * forw[0],
p_force * side[1] + f_force * forw[1],
),
True,
)
def draw(self, surface, zoom, translation, angle, draw_particles=True):
import pygame.draw
if draw_particles:
for p in self.particles:
poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in p.poly]
poly = [
(
coords[0] * zoom + translation[0],
coords[1] * zoom + translation[1],
)
for coords in poly
]
pygame.draw.lines(
surface, color=p.color, points=poly, width=2, closed=False
)
for obj in self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
path = [trans * v for v in f.shape.vertices]
path = [(coords[0], coords[1]) for coords in path]
path = [pygame.math.Vector2(c).rotate_rad(angle) for c in path]
path = [
(
coords[0] * zoom + translation[0],
coords[1] * zoom + translation[1],
)
for coords in path
]
color = [int(c * 255) for c in obj.color]
pygame.draw.polygon(surface, color=color, points=path)
if "phase" not in obj.__dict__:
continue
a1 = obj.phase
a2 = obj.phase + 1.2 # radians
s1 = math.sin(a1)
s2 = math.sin(a2)
c1 = math.cos(a1)
c2 = math.cos(a2)
if s1 > 0 and s2 > 0:
continue
if s1 > 0:
c1 = np.sign(c1)
if s2 > 0:
c2 = np.sign(c2)
white_poly = [
(-WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE),
(+WHEEL_W * SIZE, +WHEEL_R * c1 * SIZE),
(+WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE),
(-WHEEL_W * SIZE, +WHEEL_R * c2 * SIZE),
]
white_poly = [trans * v for v in white_poly]
white_poly = [(coords[0], coords[1]) for coords in white_poly]
white_poly = [
pygame.math.Vector2(c).rotate_rad(angle) for c in white_poly
]
white_poly = [
(
coords[0] * zoom + translation[0],
coords[1] * zoom + translation[1],
)
for coords in white_poly
]
pygame.draw.polygon(surface, color=WHEEL_WHITE, points=white_poly)
def _create_particle(self, point1, point2, grass):
class Particle:
pass
p = Particle()
p.color = WHEEL_COLOR if not grass else MUD_COLOR
p.ttl = 1
p.poly = [(point1[0], point1[1]), (point2[0], point2[1])]
p.grass = grass
self.particles.append(p)
while len(self.particles) > 30:
self.particles.pop(0)
return p
def destroy(self):
self.world.DestroyBody(self.hull)
self.hull = None
for w in self.wheels:
self.world.DestroyBody(w)
self.wheels = []
================================================
FILE: gym/envs/box2d/car_racing.py
================================================
__credits__ = ["Andrea PIERRÉ"]
import math
from typing import Optional, Union
import numpy as np
import gym
from gym import spaces
from gym.envs.box2d.car_dynamics import Car
from gym.error import DependencyNotInstalled, InvalidAction
from gym.utils import EzPickle
try:
import Box2D
from Box2D.b2 import contactListener, fixtureDef, polygonShape
except ImportError:
raise DependencyNotInstalled("box2D is not installed, run `pip install gym[box2d]`")
try:
# As pygame is necessary for using the environment (reset and step) even without a render mode
# therefore, pygame is a necessary import for the environment.
import pygame
from pygame import gfxdraw
except ImportError:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gym[box2d]`"
)
STATE_W = 96 # less than Atari 160x192
STATE_H = 96
VIDEO_W = 600
VIDEO_H = 400
WINDOW_W = 1000
WINDOW_H = 800
SCALE = 6.0 # Track scale
TRACK_RAD = 900 / SCALE # Track is heavily morphed circle with this radius
PLAYFIELD = 2000 / SCALE # Game over boundary
FPS = 50 # Frames per second
ZOOM = 2.7 # Camera zoom
ZOOM_FOLLOW = True # Set to False for fixed view (don't use zoom)
TRACK_DETAIL_STEP = 21 / SCALE
TRACK_TURN_RATE = 0.31
TRACK_WIDTH = 40 / SCALE
BORDER = 8 / SCALE
BORDER_MIN_COUNT = 4
GRASS_DIM = PLAYFIELD / 20.0
MAX_SHAPE_DIM = (
max(GRASS_DIM, TRACK_WIDTH, TRACK_DETAIL_STEP) * math.sqrt(2) * ZOOM * SCALE
)
class FrictionDetector(contactListener):
def __init__(self, env, lap_complete_percent):
contactListener.__init__(self)
self.env = env
self.lap_complete_percent = lap_complete_percent
def BeginContact(self, contact):
self._contact(contact, True)
def EndContact(self, contact):
self._contact(contact, False)
def _contact(self, contact, begin):
tile = None
obj = None
u1 = contact.fixtureA.body.userData
u2 = contact.fixtureB.body.userData
if u1 and "road_friction" in u1.__dict__:
tile = u1
obj = u2
if u2 and "road_friction" in u2.__dict__:
tile = u2
obj = u1
if not tile:
return
# inherit tile color from env
tile.color[:] = self.env.road_color
if not obj or "tiles" not in obj.__dict__:
return
if begin:
obj.tiles.add(tile)
if not tile.road_visited:
tile.road_visited = True
self.env.reward += 1000.0 / len(self.env.track)
self.env.tile_visited_count += 1
# Lap is considered completed if enough % of the track was covered
if (
tile.idx == 0
and self.env.tile_visited_count / len(self.env.track)
> self.lap_complete_percent
):
self.env.new_lap = True
else:
obj.tiles.remove(tile)
class CarRacing(gym.Env, EzPickle):
"""
### Description
The easiest control task to learn from pixels - a top-down
racing environment. The generated track is random every episode.
Some indicators are shown at the bottom of the window along with the
state RGB buffer. From left to right: true speed, four ABS sensors,
steering wheel position, and gyroscope.
To play yourself (it's rather fast for humans), type:
```
python gym/envs/box2d/car_racing.py
```
Remember: it's a powerful rear-wheel drive car - don't press the accelerator
and turn at the same time.
### Action Space
If continuous:
There are 3 actions: steering (-1 is full left, +1 is full right), gas, and breaking.
If discrete:
There are 5 actions: do nothing, steer left, steer right, gas, brake.
### Observation Space
State consists of 96x96 pixels.
### Rewards
The reward is -0.1 every frame and +1000/N for every track tile visited,
where N is the total number of tiles visited in the track. For example,
if you have finished in 732 frames, your reward is
1000 - 0.1*732 = 926.8 points.
### Starting State
The car starts at rest in the center of the road.
### Episode Termination
The episode finishes when all of the tiles are visited. The car can also go
outside of the playfield - that is, far off the track, in which case it will
receive -100 reward and die.
### Arguments
`lap_complete_percent` dictates the percentage of tiles that must be visited by
the agent before a lap is considered complete.
Passing `domain_randomize=True` enables the domain randomized variant of the environment.
In this scenario, the background and track colours are different on every reset.
Passing `continuous=False` converts the environment to use discrete action space.
The discrete action space has 5 actions: [do nothing, left, right, gas, brake].
### Reset Arguments
Passing the option `options["randomize"] = True` will change the current colour of the environment on demand.
Correspondingly, passing the option `options["randomize"] = False` will not change the current colour of the environment.
`domain_randomize` must be `True` on init for this argument to work.
Example usage:
```py
env = gym.make("CarRacing-v1", domain_randomize=True)
# normal reset, this changes the colour scheme by default
env.reset()
# reset with colour scheme change
env.reset(options={"randomize": True})
# reset with no colour scheme change
env.reset(options={"randomize": False})
```
### Version History
- v1: Change track completion logic and add domain randomization (0.24.0)
- v0: Original version
### References
- Chris Campbell (2014), http://www.iforce2d.net/b2dtut/top-down-car.
### Credits
Created by Oleg Klimov
"""
metadata = {
"render_modes": [
"human",
"rgb_array",
"state_pixels",
],
"render_fps": FPS,
}
def __init__(
self,
render_mode: Optional[str] = None,
verbose: bool = False,
lap_complete_percent: float = 0.95,
domain_randomize: bool = False,
continuous: bool = True,
):
EzPickle.__init__(
self,
render_mode,
verbose,
lap_complete_percent,
domain_randomize,
continuous,
)
self.continuous = continuous
self.domain_randomize = domain_randomize
self.lap_complete_percent = lap_complete_percent
self._init_colors()
self.contactListener_keepref = FrictionDetector(self, self.lap_complete_percent)
self.world = Box2D.b2World((0, 0), contactListener=self.contactListener_keepref)
self.screen: Optional[pygame.Surface] = None
self.surf = None
self.clock = None
self.isopen = True
self.invisible_state_window = None
self.invisible_video_window = None
self.road = None
self.car: Optional[Car] = None
self.reward = 0.0
self.prev_reward = 0.0
self.verbose = verbose
self.new_lap = False
self.fd_tile = fixtureDef(
shape=polygonShape(vertices=[(0, 0), (1, 0), (1, -1), (0, -1)])
)
# This will throw a warning in tests/envs/test_envs in utils/env_checker.py as the space is not symmetric
# or normalised however this is not possible here so ignore
if self.continuous:
self.action_space = spaces.Box(
np.array([-1, 0, 0]).astype(np.float32),
np.array([+1, +1, +1]).astype(np.float32),
) # steer, gas, brake
else:
self.action_space = spaces.Discrete(5)
# do nothing, left, right, gas, brake
self.observation_space = spaces.Box(
low=0, high=255, shape=(STATE_H, STATE_W, 3), dtype=np.uint8
)
self.render_mode = render_mode
def _destroy(self):
if not self.road:
return
for t in self.road:
self.world.DestroyBody(t)
self.road = []
assert self.car is not None
self.car.destroy()
def _init_colors(self):
if self.domain_randomize:
# domain randomize the bg and grass colour
self.road_color = self.np_random.uniform(0, 210, size=3)
self.bg_color = self.np_random.uniform(0, 210, size=3)
self.grass_color = np.copy(self.bg_color)
idx = self.np_random.integers(3)
self.grass_color[idx] += 20
else:
# default colours
self.road_color = np.array([102, 102, 102])
self.bg_color = np.array([102, 204, 102])
self.grass_color = np.array([102, 230, 102])
def _reinit_colors(self, randomize):
assert (
self.domain_randomize
), "domain_randomize must be True to use this function."
if randomize:
# domain randomize the bg and grass colour
self.road_color = self.np_random.uniform(0, 210, size=3)
self.bg_color = self.np_random.uniform(0, 210, size=3)
self.grass_color = np.copy(self.bg_color)
idx = self.np_random.integers(3)
self.grass_color[idx] += 20
def _create_track(self):
CHECKPOINTS = 12
# Create checkpoints
checkpoints = []
for c in range(CHECKPOINTS):
noise = self.np_random.uniform(0, 2 * math.pi * 1 / CHECKPOINTS)
alpha = 2 * math.pi * c / CHECKPOINTS + noise
rad = self.np_random.uniform(TRACK_RAD / 3, TRACK_RAD)
if c == 0:
alpha = 0
rad = 1.5 * TRACK_RAD
if c == CHECKPOINTS - 1:
alpha = 2 * math.pi * c / CHECKPOINTS
self.start_alpha = 2 * math.pi * (-0.5) / CHECKPOINTS
rad = 1.5 * TRACK_RAD
checkpoints.append((alpha, rad * math.cos(alpha), rad * math.sin(alpha)))
self.road = []
# Go from one checkpoint to another to create track
x, y, beta = 1.5 * TRACK_RAD, 0, 0
dest_i = 0
laps = 0
track = []
no_freeze = 2500
visited_other_side = False
while True:
alpha = math.atan2(y, x)
if visited_other_side and alpha > 0:
laps += 1
visited_other_side = False
if alpha < 0:
visited_other_side = True
alpha += 2 * math.pi
while True: # Find destination from checkpoints
failed = True
while True:
dest_alpha, dest_x, dest_y = checkpoints[dest_i % len(checkpoints)]
if alpha <= dest_alpha:
failed = False
break
dest_i += 1
if dest_i % len(checkpoints) == 0:
break
if not failed:
break
alpha -= 2 * math.pi
continue
r1x = math.cos(beta)
r1y = math.sin(beta)
p1x = -r1y
p1y = r1x
dest_dx = dest_x - x # vector towards destination
dest_dy = dest_y - y
# destination vector projected on rad:
proj = r1x * dest_dx + r1y * dest_dy
while beta - alpha > 1.5 * math.pi:
beta -= 2 * math.pi
while beta - alpha < -1.5 * math.pi:
beta += 2 * math.pi
prev_beta = beta
proj *= SCALE
if proj > 0.3:
beta -= min(TRACK_TURN_RATE, abs(0.001 * proj))
if proj < -0.3:
beta += min(TRACK_TURN_RATE, abs(0.001 * proj))
x += p1x * TRACK_DETAIL_STEP
y += p1y * TRACK_DETAIL_STEP
track.append((alpha, prev_beta * 0.5 + beta * 0.5, x, y))
if laps > 4:
break
no_freeze -= 1
if no_freeze == 0:
break
# Find closed loop range i1..i2, first loop should be ignored, second is OK
i1, i2 = -1, -1
i = len(track)
while True:
i -= 1
if i == 0:
return False # Failed
pass_through_start = (
track[i][0] > self.start_alpha and track[i - 1][0] <= self.start_alpha
)
if pass_through_start and i2 == -1:
i2 = i
elif pass_through_start and i1 == -1:
i1 = i
break
if self.verbose:
print("Track generation: %i..%i -> %i-tiles track" % (i1, i2, i2 - i1))
assert i1 != -1
assert i2 != -1
track = track[i1 : i2 - 1]
first_beta = track[0][1]
first_perp_x = math.cos(first_beta)
first_perp_y = math.sin(first_beta)
# Length of perpendicular jump to put together head and tail
well_glued_together = np.sqrt(
np.square(first_perp_x * (track[0][2] - track[-1][2]))
+ np.square(first_perp_y * (track[0][3] - track[-1][3]))
)
if well_glued_together > TRACK_DETAIL_STEP:
return False
# Red-white border on hard turns
border = [False] * len(track)
for i in range(len(track)):
good = True
oneside = 0
for neg in range(BORDER_MIN_COUNT):
beta1 = track[i - neg - 0][1]
beta2 = track[i - neg - 1][1]
good &= abs(beta1 - beta2) > TRACK_TURN_RATE * 0.2
oneside += np.sign(beta1 - beta2)
good &= abs(oneside) == BORDER_MIN_COUNT
border[i] = good
for i in range(len(track)):
for neg in range(BORDER_MIN_COUNT):
border[i - neg] |= border[i]
# Create tiles
for i in range(len(track)):
alpha1, beta1, x1, y1 = track[i]
alpha2, beta2, x2, y2 = track[i - 1]
road1_l = (
x1 - TRACK_WIDTH * math.cos(beta1),
y1 - TRACK_WIDTH * math.sin(beta1),
)
road1_r = (
x1 + TRACK_WIDTH * math.cos(beta1),
y1 + TRACK_WIDTH * math.sin(beta1),
)
road2_l = (
x2 - TRACK_WIDTH * math.cos(beta2),
y2 - TRACK_WIDTH * math.sin(beta2),
)
road2_r = (
x2 + TRACK_WIDTH * math.cos(beta2),
y2 + TRACK_WIDTH * math.sin(beta2),
)
vertices = [road1_l, road1_r, road2_r, road2_l]
self.fd_tile.shape.vertices = vertices
t = self.world.CreateStaticBody(fixtures=self.fd_tile)
t.userData = t
c = 0.01 * (i % 3) * 255
t.color = self.road_color + c
t.road_visited = False
t.road_friction = 1.0
t.idx = i
t.fixtures[0].sensor = True
self.road_poly.append(([road1_l, road1_r, road2_r, road2_l], t.color))
self.road.append(t)
if border[i]:
side = np.sign(beta2 - beta1)
b1_l = (
x1 + side * TRACK_WIDTH * math.cos(beta1),
y1 + side * TRACK_WIDTH * math.sin(beta1),
)
b1_r = (
x1 + side * (TRACK_WIDTH + BORDER) * math.cos(beta1),
y1 + side * (TRACK_WIDTH + BORDER) * math.sin(beta1),
)
b2_l = (
x2 + side * TRACK_WIDTH * math.cos(beta2),
y2 + side * TRACK_WIDTH * math.sin(beta2),
)
b2_r = (
x2 + side * (TRACK_WIDTH + BORDER) * math.cos(beta2),
y2 + side * (TRACK_WIDTH + BORDER) * math.sin(beta2),
)
self.road_poly.append(
(
[b1_l, b1_r, b2_r, b2_l],
(255, 255, 255) if i % 2 == 0 else (255, 0, 0),
)
)
self.track = track
return True
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
self._destroy()
self.world.contactListener_bug_workaround = FrictionDetector(
self, self.lap_complete_percent
)
self.world.contactListener = self.world.contactListener_bug_workaround
self.reward = 0.0
self.prev_reward = 0.0
self.tile_visited_count = 0
self.t = 0.0
self.new_lap = False
self.road_poly = []
if self.domain_randomize:
randomize = True
if isinstance(options, dict):
if "randomize" in options:
randomize = options["randomize"]
self._reinit_colors(randomize)
while True:
success = self._create_track()
if success:
break
if self.verbose:
print(
"retry to generate track (normal if there are not many"
"instances of this message)"
)
self.car = Car(self.world, *self.track[0][1:4])
if self.render_mode == "human":
self.render()
return self.step(None)[0], {}
def step(self, action: Union[np.ndarray, int]):
assert self.car is not None
if action is not None:
if self.continuous:
self.car.steer(-action[0])
self.car.gas(action[1])
self.car.brake(action[2])
else:
if not self.action_space.contains(action):
raise InvalidAction(
f"you passed the invalid action `{action}`. "
f"The supported action_space is `{self.action_space}`"
)
self.car.steer(-0.6 * (action == 1) + 0.6 * (action == 2))
self.car.gas(0.2 * (action == 3))
self.car.brake(0.8 * (action == 4))
self.car.step(1.0 / FPS)
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
self.t += 1.0 / FPS
self.state = self._render("state_pixels")
step_reward = 0
terminated = False
truncated = False
if action is not None: # First step without action, called from reset()
self.reward -= 0.1
# We actually don't want to count fuel spent, we want car to be faster.
# self.reward -= 10 * self.car.fuel_spent / ENGINE_POWER
self.car.fuel_spent = 0.0
step_reward = self.reward - self.prev_reward
self.prev_reward = self.reward
if self.tile_visited_count == len(self.track) or self.new_lap:
# Truncation due to finishing lap
# This should not be treated as a failure
# but like a timeout
truncated = True
x, y = self.car.hull.position
if abs(x) > PLAYFIELD or abs(y) > PLAYFIELD:
terminated = True
step_reward = -100
if self.render_mode == "human":
self.render()
return self.state, step_reward, terminated, truncated, {}
def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
else:
return self._render(self.render_mode)
def _render(self, mode: str):
assert mode in self.metadata["render_modes"]
pygame.font.init()
if self.screen is None and mode == "human":
pygame.init()
pygame.display.init()
self.screen = pygame.display.set_mode((WINDOW_W, WINDOW_H))
if self.clock is None:
self.clock = pygame.time.Clock()
if "t" not in self.__dict__:
return # reset() not called yet
self.surf = pygame.Surface((WINDOW_W, WINDOW_H))
assert self.car is not None
# computing transformations
angle = -self.car.hull.angle
# Animating first second zoom.
zoom = 0.1 * SCALE * max(1 - self.t, 0) + ZOOM * SCALE * min(self.t, 1)
scroll_x = -(self.car.hull.position[0]) * zoom
scroll_y = -(self.car.hull.position[1]) * zoom
trans = pygame.math.Vector2((scroll_x, scroll_y)).rotate_rad(angle)
trans = (WINDOW_W / 2 + trans[0], WINDOW_H / 4 + trans[1])
self._render_road(zoom, trans, angle)
self.car.draw(
self.surf,
zoom,
trans,
angle,
mode not in ["state_pixels_list", "state_pixels"],
)
self.surf = pygame.transform.flip(self.surf, False, True)
# showing stats
self._render_indicators(WINDOW_W, WINDOW_H)
font = pygame.font.Font(pygame.font.get_default_font(), 42)
text = font.render("%04i" % self.reward, True, (255, 255, 255), (0, 0, 0))
text_rect = text.get_rect()
text_rect.center = (60, WINDOW_H - WINDOW_H * 2.5 / 40.0)
self.surf.blit(text, text_rect)
if mode == "human":
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
assert self.screen is not None
self.screen.fill(0)
self.screen.blit(self.surf, (0, 0))
pygame.display.flip()
if mode == "rgb_array":
return self._create_image_array(self.surf, (VIDEO_W, VIDEO_H))
elif mode == "state_pixels":
return self._create_image_array(self.surf, (STATE_W, STATE_H))
else:
return self.isopen
def _render_road(self, zoom, translation, angle):
bounds = PLAYFIELD
field = [
(bounds, bounds),
(bounds, -bounds),
(-bounds, -bounds),
(-bounds, bounds),
]
# draw background
self._draw_colored_polygon(
self.surf, field, self.bg_color, zoom, translation, angle, clip=False
)
# draw grass patches
grass = []
for x in range(-20, 20, 2):
for y in range(-20, 20, 2):
grass.append(
[
(GRASS_DIM * x + GRASS_DIM, GRASS_DIM * y + 0),
(GRASS_DIM * x + 0, GRASS_DIM * y + 0),
(GRASS_DIM * x + 0, GRASS_DIM * y + GRASS_DIM),
(GRASS_DIM * x + GRASS_DIM, GRASS_DIM * y + GRASS_DIM),
]
)
for poly in grass:
self._draw_colored_polygon(
self.surf, poly, self.grass_color, zoom, translation, angle
)
# draw road
for poly, color in self.road_poly:
# converting to pixel coordinates
poly = [(p[0], p[1]) for p in poly]
color = [int(c) for c in color]
self._draw_colored_polygon(self.surf, poly, color, zoom, translation, angle)
def _render_indicators(self, W, H):
s = W / 40.0
h = H / 40.0
color = (0, 0, 0)
polygon = [(W, H), (W, H - 5 * h), (0, H - 5 * h), (0, H)]
pygame.draw.polygon(self.surf, color=color, points=polygon)
def vertical_ind(place, val):
return [
(place * s, H - (h + h * val)),
((place + 1) * s, H - (h + h * val)),
((place + 1) * s, H - h),
((place + 0) * s, H - h),
]
def horiz_ind(place, val):
return [
((place + 0) * s, H - 4 * h),
((place + val) * s, H - 4 * h),
((place + val) * s, H - 2 * h),
((place + 0) * s, H - 2 * h),
]
assert self.car is not None
true_speed = np.sqrt(
np.square(self.car.hull.linearVelocity[0])
+ np.square(self.car.hull.linearVelocity[1])
)
# simple wrapper to render if the indicator value is above a threshold
def render_if_min(value, points, color):
if abs(value) > 1e-4:
pygame.draw.polygon(self.surf, points=points, color=color)
render_if_min(true_speed, vertical_ind(5, 0.02 * true_speed), (255, 255, 255))
# ABS sensors
render_if_min(
self.car.wheels[0].omega,
vertical_ind(7, 0.01 * self.car.wheels[0].omega),
(0, 0, 255),
)
render_if_min(
self.car.wheels[1].omega,
vertical_ind(8, 0.01 * self.car.wheels[1].omega),
(0, 0, 255),
)
render_if_min(
self.car.wheels[2].omega,
vertical_ind(9, 0.01 * self.car.wheels[2].omega),
(51, 0, 255),
)
render_if_min(
self.car.wheels[3].omega,
vertical_ind(10, 0.01 * self.car.wheels[3].omega),
(51, 0, 255),
)
render_if_min(
self.car.wheels[0].joint.angle,
horiz_ind(20, -10.0 * self.car.wheels[0].joint.angle),
(0, 255, 0),
)
render_if_min(
self.car.hull.angularVelocity,
horiz_ind(30, -0.8 * self.car.hull.angularVelocity),
(255, 0, 0),
)
def _draw_colored_polygon(
self, surface, poly, color, zoom, translation, angle, clip=True
):
poly = [pygame.math.Vector2(c).rotate_rad(angle) for c in poly]
poly = [
(c[0] * zoom + translation[0], c[1] * zoom + translation[1]) for c in poly
]
# This checks if the polygon is out of bounds of the screen, and we skip drawing if so.
# Instead of calculating exactly if the polygon and screen overlap,
# we simply check if the polygon is in a larger bounding box whose dimension
# is greater than the screen by MAX_SHAPE_DIM, which is the maximum
# diagonal length of an environment object
if not clip or any(
(-MAX_SHAPE_DIM <= coord[0] <= WINDOW_W + MAX_SHAPE_DIM)
and (-MAX_SHAPE_DIM <= coord[1] <= WINDOW_H + MAX_SHAPE_DIM)
for coord in poly
):
gfxdraw.aapolygon(self.surf, poly, color)
gfxdraw.filled_polygon(self.surf, poly, color)
def _create_image_array(self, screen, size):
scaled_screen = pygame.transform.smoothscale(screen, size)
return np.transpose(
np.array(pygame.surfarray.pixels3d(scaled_screen)), axes=(1, 0, 2)
)
def close(self):
if self.screen is not None:
pygame.display.quit()
self.isopen = False
pygame.quit()
if __name__ == "__main__":
a = np.array([0.0, 0.0, 0.0])
def register_input():
global quit, restart
for event in pygame.event.get():
if event.type == pygame.KEYDOWN:
if event.key == pygame.K_LEFT:
a[0] = -1.0
if event.key == pygame.K_RIGHT:
a[0] = +1.0
if event.key == pygame.K_UP:
a[1] = +1.0
if event.key == pygame.K_DOWN:
a[2] = +0.8 # set 1.0 for wheels to block to zero rotation
if event.key == pygame.K_RETURN:
restart = True
if event.key == pygame.K_ESCAPE:
quit = True
if event.type == pygame.KEYUP:
if event.key == pygame.K_LEFT:
a[0] = 0
if event.key == pygame.K_RIGHT:
a[0] = 0
if event.key == pygame.K_UP:
a[1] = 0
if event.key == pygame.K_DOWN:
a[2] = 0
if event.type == pygame.QUIT:
quit = True
env = CarRacing(render_mode="human")
quit = False
while not quit:
env.reset()
total_reward = 0.0
steps = 0
restart = False
while True:
register_input()
s, r, terminated, truncated, info = env.step(a)
total_reward += r
if steps % 200 == 0 or terminated or truncated:
print("\naction " + str([f"{x:+0.2f}" for x in a]))
print(f"step {steps} total_reward {total_reward:+0.2f}")
steps += 1
if terminated or truncated or restart or quit:
break
env.close()
================================================
FILE: gym/envs/box2d/lunar_lander.py
================================================
__credits__ = ["Andrea PIERRÉ"]
import math
import warnings
from typing import TYPE_CHECKING, Optional
import numpy as np
import gym
from gym import error, spaces
from gym.error import DependencyNotInstalled
from gym.utils import EzPickle, colorize
from gym.utils.step_api_compatibility import step_api_compatibility
try:
import Box2D
from Box2D.b2 import (
circleShape,
contactListener,
edgeShape,
fixtureDef,
polygonShape,
revoluteJointDef,
)
except ImportError:
raise DependencyNotInstalled("box2d is not installed, run `pip install gym[box2d]`")
if TYPE_CHECKING:
import pygame
FPS = 50
SCALE = 30.0 # affects how fast-paced the game is, forces should be adjusted as well
MAIN_ENGINE_POWER = 13.0
SIDE_ENGINE_POWER = 0.6
INITIAL_RANDOM = 1000.0 # Set 1500 to make game harder
LANDER_POLY = [(-14, +17), (-17, 0), (-17, -10), (+17, -10), (+17, 0), (+14, +17)]
LEG_AWAY = 20
LEG_DOWN = 18
LEG_W, LEG_H = 2, 8
LEG_SPRING_TORQUE = 40
SIDE_ENGINE_HEIGHT = 14.0
SIDE_ENGINE_AWAY = 12.0
VIEWPORT_W = 600
VIEWPORT_H = 400
class ContactDetector(contactListener):
def __init__(self, env):
contactListener.__init__(self)
self.env = env
def BeginContact(self, contact):
if (
self.env.lander == contact.fixtureA.body
or self.env.lander == contact.fixtureB.body
):
self.env.game_over = True
for i in range(2):
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
self.env.legs[i].ground_contact = True
def EndContact(self, contact):
for i in range(2):
if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
self.env.legs[i].ground_contact = False
class LunarLander(gym.Env, EzPickle):
"""
### Description
This environment is a classic rocket trajectory optimization problem.
According to Pontryagin's maximum principle, it is optimal to fire the
engine at full throttle or turn it off. This is the reason why this
environment has discrete actions: engine on or off.
There are two environment versions: discrete or continuous.
The landing pad is always at coordinates (0,0). The coordinates are the
first two numbers in the state vector.
Landing outside of the landing pad is possible. Fuel is infinite, so an agent
can learn to fly and then land on its first attempt.
To see a heuristic landing, run:
```
python gym/envs/box2d/lunar_lander.py
```
<!-- To play yourself, run: -->
<!-- python examples/agents/keyboard_agent.py LunarLander-v2 -->
### Action Space
There are four discrete actions available: do nothing, fire left
orientation engine, fire main engine, fire right orientation engine.
### Observation Space
The state is an 8-dimensional vector: the coordinates of the lander in `x` & `y`, its linear
velocities in `x` & `y`, its angle, its angular velocity, and two booleans
that represent whether each leg is in contact with the ground or not.
### Rewards
After every step a reward is granted. The total reward of an episode is the
sum of the rewards for all the steps within that episode.
For each step, the reward:
- is increased/decreased the closer/further the lander is to the landing pad.
- is increased/decreased the slower/faster the lander is moving.
- is decreased the more the lander is tilted (angle not horizontal).
- is increased by 10 points for each leg that is in contact with the ground.
- is decreased by 0.03 points each frame a side engine is firing.
- is decreased by 0.3 points each frame the main engine is firing.
The episode receive an additional reward of -100 or +100 points for crashing or landing safely respectively.
An episode is considered a solution if it scores at least 200 points.
### Starting State
The lander starts at the top center of the viewport with a random initial
force applied to its center of mass.
### Episode Termination
The episode finishes if:
1) the lander crashes (the lander body gets in contact with the moon);
2) the lander gets outside of the viewport (`x` coordinate is greater than 1);
3) the lander is not awake. From the [Box2D docs](https://box2d.org/documentation/md__d_1__git_hub_box2d_docs_dynamics.html#autotoc_md61),
a body which is not awake is a body which doesn't move and doesn't
collide with any other body:
> When Box2D determines that a body (or group of bodies) has come to rest,
> the body enters a sleep state which has very little CPU overhead. If a
> body is awake and collides with a sleeping body, then the sleeping body
> wakes up. Bodies will also wake up if a joint or contact attached to
> them is destroyed.
### Arguments
To use to the _continuous_ environment, you need to specify the
`continuous=True` argument like below:
```python
import gym
env = gym.make(
"LunarLander-v2",
continuous: bool = False,
gravity: float = -10.0,
enable_wind: bool = False,
wind_power: float = 15.0,
turbulence_power: float = 1.5,
)
```
If `continuous=True` is passed, continuous actions (corresponding to the throttle of the engines) will be used and the
action space will be `Box(-1, +1, (2,), dtype=np.float32)`.
The first coordinate of an action determines the throttle of the main engine, while the second
coordinate specifies the throttle of the lateral boosters.
Given an action `np.array([main, lateral])`, the main engine will be turned off completely if
`main < 0` and the throttle scales affinely from 50% to 100% for `0 <= main <= 1` (in particular, the
main engine doesn't work with less than 50% power).
Similarly, if `-0.5 < lateral < 0.5`, the lateral boosters will not fire at all. If `lateral < -0.5`, the left
booster will fire, and if `lateral > 0.5`, the right booster will fire. Again, the throttle scales affinely
from 50% to 100% between -1 and -0.5 (and 0.5 and 1, respectively).
`gravity` dictates the gravitational constant, this is bounded to be within 0 and -12.
If `enable_wind=True` is passed, there will be wind effects applied to the lander.
The wind is generated using the function `tanh(sin(2 k (t+C)) + sin(pi k (t+C)))`.
`k` is set to 0.01.
`C` is sampled randomly between -9999 and 9999.
`wind_power` dictates the maximum magnitude of linear wind applied to the craft. The recommended value for `wind_power` is between 0.0 and 20.0.
`turbulence_power` dictates the maximum magnitude of rotational wind applied to the craft. The recommended value for `turbulence_power` is between 0.0 and 2.0.
### Version History
- v2: Count energy spent and in v0.24, added turbulance with wind power and turbulence_power parameters
- v1: Legs contact with ground added in state vector; contact with ground
give +10 reward points, and -10 if then lose contact; reward
renormalized to 200; harder initial random push.
- v0: Initial version
<!-- ### References -->
### Credits
Created by Oleg Klimov
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": FPS,
}
def __init__(
self,
render_mode: Optional[str] = None,
continuous: bool = False,
gravity: float = -10.0,
enable_wind: bool = False,
wind_power: float = 15.0,
turbulence_power: float = 1.5,
):
EzPickle.__init__(
self,
render_mode,
continuous,
gravity,
enable_wind,
wind_power,
turbulence_power,
)
assert (
-12.0 < gravity and gravity < 0.0
), f"gravity (current value: {gravity}) must be between -12 and 0"
self.gravity = gravity
if 0.0 > wind_power or wind_power > 20.0:
warnings.warn(
colorize(
f"WARN: wind_power value is recommended to be between 0.0 and 20.0, (current value: {wind_power})",
"yellow",
),
)
self.wind_power = wind_power
if 0.0 > turbulence_power or turbulence_power > 2.0:
warnings.warn(
colorize(
f"WARN: turbulence_power value is recommended to be between 0.0 and 2.0, (current value: {turbulence_power})",
"yellow",
),
)
self.turbulence_power = turbulence_power
self.enable_wind = enable_wind
self.wind_idx = np.random.randint(-9999, 9999)
self.torque_idx = np.random.randint(-9999, 9999)
self.screen: pygame.Surface = None
self.clock = None
self.isopen = True
self.world = Box2D.b2World(gravity=(0, gravity))
self.moon = None
self.lander: Optional[Box2D.b2Body] = None
self.particles = []
self.prev_reward = None
self.continuous = continuous
low = np.array(
[
# these are bounds for position
# realistically the environment should have ended
# long before we reach more than 50% outside
-1.5,
-1.5,
# velocity bounds is 5x rated speed
-5.0,
-5.0,
-math.pi,
-5.0,
-0.0,
-0.0,
]
).astype(np.float32)
high = np.array(
[
# these are bounds for position
# realistically the environment should have ended
# long before we reach more than 50% outside
1.5,
1.5,
# velocity bounds is 5x rated speed
5.0,
5.0,
math.pi,
5.0,
1.0,
1.0,
]
).astype(np.float32)
# useful range is -1 .. +1, but spikes can be higher
self.observation_space = spaces.Box(low, high)
if self.continuous:
# Action is two floats [main engine, left-right engines].
# Main engine: -1..0 off, 0..+1 throttle from 50% to 100% power. Engine can't work with less than 50% power.
# Left-right: -1.0..-0.5 fire left engine, +0.5..+1.0 fire right engine, -0.5..0.5 off
self.action_space = spaces.Box(-1, +1, (2,), dtype=np.float32)
else:
# Nop, fire left engine, main engine, right engine
self.action_space = spaces.Discrete(4)
self.render_mode = render_mode
def _destroy(self):
if not self.moon:
return
self.world.contactListener = None
self._clean_particles(True)
self.world.DestroyBody(self.moon)
self.moon = None
self.world.DestroyBody(self.lander)
self.lander = None
self.world.DestroyBody(self.legs[0])
self.world.DestroyBody(self.legs[1])
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
self._destroy()
self.world.contactListener_keepref = ContactDetector(self)
self.world.contactListener = self.world.contactListener_keepref
self.game_over = False
self.prev_shaping = None
W = VIEWPORT_W / SCALE
H = VIEWPORT_H / SCALE
# terrain
CHUNKS = 11
height = self.np_random.uniform(0, H / 2, size=(CHUNKS + 1,))
chunk_x = [W / (CHUNKS - 1) * i for i in range(CHUNKS)]
self.helipad_x1 = chunk_x[CHUNKS // 2 - 1]
self.helipad_x2 = chunk_x[CHUNKS // 2 + 1]
self.helipad_y = H / 4
height[CHUNKS // 2 - 2] = self.helipad_y
height[CHUNKS // 2 - 1] = self.helipad_y
height[CHUNKS // 2 + 0] = self.helipad_y
height[CHUNKS // 2 + 1] = self.helipad_y
height[CHUNKS // 2 + 2] = self.helipad_y
smooth_y = [
0.33 * (height[i - 1] + height[i + 0] + height[i + 1])
for i in range(CHUNKS)
]
self.moon = self.world.CreateStaticBody(
shapes=edgeShape(vertices=[(0, 0), (W, 0)])
)
self.sky_polys = []
for i in range(CHUNKS - 1):
p1 = (chunk_x[i], smooth_y[i])
p2 = (chunk_x[i + 1], smooth_y[i + 1])
self.moon.CreateEdgeFixture(vertices=[p1, p2], density=0, friction=0.1)
self.sky_polys.append([p1, p2, (p2[0], H), (p1[0], H)])
self.moon.color1 = (0.0, 0.0, 0.0)
self.moon.color2 = (0.0, 0.0, 0.0)
initial_y = VIEWPORT_H / SCALE
self.lander: Box2D.b2Body = self.world.CreateDynamicBody(
position=(VIEWPORT_W / SCALE / 2, initial_y),
angle=0.0,
fixtures=fixtureDef(
shape=polygonShape(
vertices=[(x / SCALE, y / SCALE) for x, y in LANDER_POLY]
),
density=5.0,
friction=0.1,
categoryBits=0x0010,
maskBits=0x001, # collide only with ground
restitution=0.0,
), # 0.99 bouncy
)
self.lander.color1 = (128, 102, 230)
self.lander.color2 = (77, 77, 128)
self.lander.ApplyForceToCenter(
(
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM),
),
True,
)
self.legs = []
for i in [-1, +1]:
leg = self.world.CreateDynamicBody(
position=(VIEWPORT_W / SCALE / 2 - i * LEG_AWAY / SCALE, initial_y),
angle=(i * 0.05),
fixtures=fixtureDef(
shape=polygonShape(box=(LEG_W / SCALE, LEG_H / SCALE)),
density=1.0,
restitution=0.0,
categoryBits=0x0020,
maskBits=0x001,
),
)
leg.ground_contact = False
leg.color1 = (128, 102, 230)
leg.color2 = (77, 77, 128)
rjd = revoluteJointDef(
bodyA=self.lander,
bodyB=leg,
localAnchorA=(0, 0),
localAnchorB=(i * LEG_AWAY / SCALE, LEG_DOWN / SCALE),
enableMotor=True,
enableLimit=True,
maxMotorTorque=LEG_SPRING_TORQUE,
motorSpeed=+0.3 * i, # low enough not to jump back into the sky
)
if i == -1:
rjd.lowerAngle = (
+0.9 - 0.5
) # The most esoteric numbers here, angled legs have freedom to travel within
rjd.upperAngle = +0.9
else:
rjd.lowerAngle = -0.9
rjd.upperAngle = -0.9 + 0.5
leg.joint = self.world.CreateJoint(rjd)
self.legs.append(leg)
self.drawlist = [self.lander] + self.legs
if self.render_mode == "human":
self.render()
return self.step(np.array([0, 0]) if self.continuous else 0)[0], {}
def _create_particle(self, mass, x, y, ttl):
p = self.world.CreateDynamicBody(
position=(x, y),
angle=0.0,
fixtures=fixtureDef(
shape=circleShape(radius=2 / SCALE, pos=(0, 0)),
density=mass,
friction=0.1,
categoryBits=0x0100,
maskBits=0x001, # collide only with ground
restitution=0.3,
),
)
p.ttl = ttl
self.particles.append(p)
self._clean_particles(False)
return p
def _clean_particles(self, all):
while self.particles and (all or self.particles[0].ttl < 0):
self.world.DestroyBody(self.particles.pop(0))
def step(self, action):
assert self.lander is not None
# Update wind
assert self.lander is not None, "You forgot to call reset()"
if self.enable_wind and not (
self.legs[0].ground_contact or self.legs[1].ground_contact
):
# the function used for wind is tanh(sin(2 k x) + sin(pi k x)),
# which is proven to never be periodic, k = 0.01
wind_mag = (
math.tanh(
math.sin(0.02 * self.wind_idx)
+ (math.sin(math.pi * 0.01 * self.wind_idx))
)
* self.wind_power
)
self.wind_idx += 1
self.lander.ApplyForceToCenter(
(wind_mag, 0.0),
True,
)
# the function used for torque is tanh(sin(2 k x) + sin(pi k x)),
# which is proven to never be periodic, k = 0.01
torque_mag = math.tanh(
math.sin(0.02 * self.torque_idx)
+ (math.sin(math.pi * 0.01 * self.torque_idx))
) * (self.turbulence_power)
self.torque_idx += 1
self.lander.ApplyTorque(
(torque_mag),
True,
)
if self.continuous:
action = np.clip(action, -1, +1).astype(np.float32)
else:
assert self.action_space.contains(
action
), f"{action!r} ({type(action)}) invalid "
# Engines
tip = (math.sin(self.lander.angle), math.cos(self.lander.angle))
side = (-tip[1], tip[0])
dispersion = [self.np_random.uniform(-1.0, +1.0) / SCALE for _ in range(2)]
m_power = 0.0
if (self.continuous and action[0] > 0.0) or (
not self.continuous and action == 2
):
# Main engine
if self.continuous:
m_power = (np.clip(action[0], 0.0, 1.0) + 1.0) * 0.5 # 0.5..1.0
assert m_power >= 0.5 and m_power <= 1.0
else:
m_power = 1.0
# 4 is move a bit downwards, +-2 for randomness
ox = tip[0] * (4 / SCALE + 2 * dispersion[0]) + side[0] * dispersion[1]
oy = -tip[1] * (4 / SCALE + 2 * dispersion[0]) - side[1] * dispersion[1]
impulse_pos = (self.lander.position[0] + ox, self.lander.position[1] + oy)
p = self._create_particle(
3.5, # 3.5 is here to make particle speed adequate
impulse_pos[0],
impulse_pos[1],
m_power,
) # particles are just a decoration
p.ApplyLinearImpulse(
(ox * MAIN_ENGINE_POWER * m_power, oy * MAIN_ENGINE_POWER * m_power),
impulse_pos,
True,
)
self.lander.ApplyLinearImpulse(
(-ox * MAIN_ENGINE_POWER * m_power, -oy * MAIN_ENGINE_POWER * m_power),
impulse_pos,
True,
)
s_power = 0.0
if (self.continuous and np.abs(action[1]) > 0.5) or (
not self.continuous and action in [1, 3]
):
# Orientation engines
if self.continuous:
direction = np.sign(action[1])
s_power = np.clip(np.abs(action[1]), 0.5, 1.0)
assert s_power >= 0.5 and s_power <= 1.0
else:
direction = action - 2
s_power = 1.0
ox = tip[0] * dispersion[0] + side[0] * (
3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
)
oy = -tip[1] * dispersion[0] - side[1] * (
3 * dispersion[1] + direction * SIDE_ENGINE_AWAY / SCALE
)
impulse_pos = (
self.lander.position[0] + ox - tip[0] * 17 / SCALE,
self.lander.position[1] + oy + tip[1] * SIDE_ENGINE_HEIGHT / SCALE,
)
p = self._create_particle(0.7, impulse_pos[0], impulse_pos[1], s_power)
p.ApplyLinearImpulse(
(ox * SIDE_ENGINE_POWER * s_power, oy * SIDE_ENGINE_POWER * s_power),
impulse_pos,
True,
)
self.lander.ApplyLinearImpulse(
(-ox * SIDE_ENGINE_POWER * s_power, -oy * SIDE_ENGINE_POWER * s_power),
impulse_pos,
True,
)
self.world.Step(1.0 / FPS, 6 * 30, 2 * 30)
pos = self.lander.position
vel = self.lander.linearVelocity
state = [
(pos.x - VIEWPORT_W / SCALE / 2) / (VIEWPORT_W / SCALE / 2),
(pos.y - (self.helipad_y + LEG_DOWN / SCALE)) / (VIEWPORT_H / SCALE / 2),
vel.x * (VIEWPORT_W / SCALE / 2) / FPS,
vel.y * (VIEWPORT_H / SCALE / 2) / FPS,
self.lander.angle,
20.0 * self.lander.angularVelocity / FPS,
1.0 if self.legs[0].ground_contact else 0.0,
1.0 if self.legs[1].ground_contact else 0.0,
]
assert len(state) == 8
reward = 0
shaping = (
-100 * np.sqrt(state[0] * state[0] + state[1] * state[1])
- 100 * np.sqrt(state[2] * state[2] + state[3] * state[3])
- 100 * abs(state[4])
+ 10 * state[6]
+ 10 * state[7]
) # And ten points for legs contact, the idea is if you
# lose contact again after landing, you get negative reward
if self.prev_shaping is not None:
reward = shaping - self.prev_shaping
self.prev_shaping = shaping
reward -= (
m_power * 0.30
) # less fuel spent is better, about -30 for heuristic landing
reward -= s_power * 0.03
terminated = False
if self.game_over or abs(state[0]) >= 1.0:
terminated = True
reward = -100
if not self.lander.awake:
terminated = True
reward = +100
if self.render_mode == "human":
self.render()
return np.array(state, dtype=np.float32), reward, terminated, False, {}
def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
from pygame import gfxdraw
except ImportError:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gym[box2d]`"
)
if self.screen is None and self.render_mode == "human":
pygame.init()
pygame.display.init()
self.screen = pygame.display.set_mode((VIEWPORT_W, VIEWPORT_H))
if self.clock is None:
self.clock = pygame.time.Clock()
self.surf = pygame.Surface((VIEWPORT_W, VIEWPORT_H))
pygame.transform.scale(self.surf, (SCALE, SCALE))
pygame.draw.rect(self.surf, (255, 255, 255), self.surf.get_rect())
for obj in self.particles:
obj.ttl -= 0.15
obj.color1 = (
int(max(0.2, 0.15 + obj.ttl) * 255),
int(max(0.2, 0.5 * obj.ttl) * 255),
int(max(0.2, 0.5 * obj.ttl) * 255),
)
obj.color2 = (
int(max(0.2, 0.15 + obj.ttl) * 255),
int(max(0.2, 0.5 * obj.ttl) * 255),
int(max(0.2, 0.5 * obj.ttl) * 255),
)
self._clean_particles(False)
for p in self.sky_polys:
scaled_poly = []
for coord in p:
scaled_poly.append((coord[0] * SCALE, coord[1] * SCALE))
pygame.draw.polygon(self.surf, (0, 0, 0), scaled_poly)
gfxdraw.aapolygon(self.surf, scaled_poly, (0, 0, 0))
for obj in self.particles + self.drawlist:
for f in obj.fixtures:
trans = f.body.transform
if type(f.shape) is circleShape:
pygame.draw.circle(
self.surf,
color=obj.color1,
center=trans * f.shape.pos * SCALE,
radius=f.shape.radius * SCALE,
)
pygame.draw.circle(
self.surf,
color=obj.color2,
center=trans * f.shape.pos * SCALE,
radius=f.shape.radius * SCALE,
)
else:
path = [trans * v * SCALE for v in f.shape.vertices]
pygame.draw.polygon(self.surf, color=obj.color1, points=path)
gfxdraw.aapolygon(self.surf, path, obj.color1)
pygame.draw.aalines(
self.surf, color=obj.color2, points=path, closed=True
)
for x in [self.helipad_x1, self.helipad_x2]:
x = x * SCALE
flagy1 = self.helipad_y * SCALE
flagy2 = flagy1 + 50
pygame.draw.line(
self.surf,
color=(255, 255, 255),
start_pos=(x, flagy1),
end_pos=(x, flagy2),
width=1,
)
pygame.draw.polygon(
self.surf,
color=(204, 204, 0),
points=[
(x, flagy2),
(x, flagy2 - 10),
(x + 25, flagy2 - 5),
],
)
gfxdraw.aapolygon(
self.surf,
[(x, flagy2), (x, flagy2 - 10), (x + 25, flagy2 - 5)],
(204, 204, 0),
)
self.surf = pygame.transform.flip(self.surf, False, True)
if self.render_mode == "human":
assert self.screen is not None
self.screen.blit(self.surf, (0, 0))
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
pygame.display.flip()
elif self.render_mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.surf)), axes=(1, 0, 2)
)
def close(self):
if self.screen is not None:
import pygame
pygame.display.quit()
pygame.quit()
self.isopen = False
def heuristic(env, s):
"""
The heuristic for
1. Testing
2. Demonstration rollout.
Args:
env: The environment
s (list): The state. Attributes:
s[0] is the horizontal coordinate
s[1] is the vertical coordinate
s[2] is the horizontal speed
s[3] is the vertical speed
s[4] is the angle
s[5] is the angular speed
s[6] 1 if first leg has contact, else 0
s[7] 1 if second leg has contact, else 0
Returns:
a: The heuristic to be fed into the step function defined above to determine the next step and reward.
"""
angle_targ = s[0] * 0.5 + s[2] * 1.0 # angle should point towards center
if angle_targ > 0.4:
angle_targ = 0.4 # more than 0.4 radians (22 degrees) is bad
if angle_targ < -0.4:
angle_targ = -0.4
hover_targ = 0.55 * np.abs(
s[0]
) # target y should be proportional to horizontal offset
angle_todo = (angle_targ - s[4]) * 0.5 - (s[5]) * 1.0
hover_todo = (hover_targ - s[1]) * 0.5 - (s[3]) * 0.5
if s[6] or s[7]: # legs have contact
angle_todo = 0
hover_todo = (
-(s[3]) * 0.5
) # override to reduce fall speed, that's all we need after contact
if env.continuous:
a = np.array([hover_todo * 20 - 1, -angle_todo * 20])
a = np.clip(a, -1, +1)
else:
a = 0
if hover_todo > np.abs(angle_todo) and hover_todo > 0.05:
a = 2
elif angle_todo < -0.05:
a = 3
elif angle_todo > +0.05:
a = 1
return a
def demo_heuristic_lander(env, seed=None, render=False):
total_reward = 0
steps = 0
s, info = env.reset(seed=seed)
while True:
a = heuristic(env, s)
s, r, terminated, truncated, info = step_api_compatibility(env.step(a), True)
total_reward += r
if render:
still_open = env.render()
if still_open is False:
break
if steps % 20 == 0 or terminated or truncated:
print("observations:", " ".join([f"{x:+0.2f}" for x in s]))
print(f"step {steps} total_reward {total_reward:+0.2f}")
steps += 1
if terminated or truncated:
break
if render:
env.close()
return total_reward
class LunarLanderContinuous:
def __init__(self):
raise error.Error(
"Error initializing LunarLanderContinuous Environment.\n"
"Currently, we do not support initializing this mode of environment by calling the class directly.\n"
"To use this environment, instead create it by specifying the continuous keyword in gym.make, i.e.\n"
'gym.make("LunarLander-v2", continuous=True)'
)
if __name__ == "__main__":
demo_heuristic_lander(LunarLander(), render=True)
================================================
FILE: gym/envs/classic_control/__init__.py
================================================
from gym.envs.classic_control.acrobot import AcrobotEnv
from gym.envs.classic_control.cartpole import CartPoleEnv
from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv
from gym.envs.classic_control.mountain_car import MountainCarEnv
from gym.envs.classic_control.pendulum import PendulumEnv
================================================
FILE: gym/envs/classic_control/acrobot.py
================================================
"""classic Acrobot task"""
from typing import Optional
import numpy as np
from numpy import cos, pi, sin
from gym import core, logger, spaces
from gym.error import DependencyNotInstalled
__copyright__ = "Copyright 2013, RLPy http://acl.mit.edu/RLPy"
__credits__ = [
"Alborz Geramifard",
"Robert H. Klein",
"Christoph Dann",
"William Dabney",
"Jonathan P. How",
]
__license__ = "BSD 3-Clause"
__author__ = "Christoph Dann <cdann@cdann.de>"
# SOURCE:
# https://github.com/rlpy/rlpy/blob/master/rlpy/Domains/Acrobot.py
from gym.envs.classic_control import utils
class AcrobotEnv(core.Env):
"""
### Description
The Acrobot environment is based on Sutton's work in
["Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding"](https://papers.nips.cc/paper/1995/hash/8f1d43620bc6bb580df6e80b0dc05c48-Abstract.html)
and [Sutton and Barto's book](http://www.incompleteideas.net/book/the-book-2nd.html).
The system consists of two links connected linearly to form a chain, with one end of
the chain fixed. The joint between the two links is actuated. The goal is to apply
torques on the actuated joint to swing the free end of the linear chain above a
given height while starting from the initial state of hanging downwards.
As seen in the **Gif**: two blue links connected by two green joints. The joint in
between the two links is actuated. The goal is to swing the free end of the outer-link
to reach the target height (black horizontal line above system) by applying torque on
the actuator.
### Action Space
The action is discrete, deterministic, and represents the torque applied on the actuated
joint between the two links.
| Num | Action | Unit |
|-----|---------------------------------------|--------------|
| 0 | apply -1 torque to the actuated joint | torque (N m) |
| 1 | apply 0 torque to the actuated joint | torque (N m) |
| 2 | apply 1 torque to the actuated joint | torque (N m) |
### Observation Space
The observation is a `ndarray` with shape `(6,)` that provides information about the
two rotational joint angles as well as their angular velocities:
| Num | Observation | Min | Max |
|-----|------------------------------|---------------------|-------------------|
| 0 | Cosine of `theta1` | -1 | 1 |
| 1 | Sine of `theta1` | -1 | 1 |
| 2 | Cosine of `theta2` | -1 | 1 |
| 3 | Sine of `theta2` | -1 | 1 |
| 4 | Angular velocity of `theta1` | ~ -12.567 (-4 * pi) | ~ 12.567 (4 * pi) |
| 5 | Angular velocity of `theta2` | ~ -28.274 (-9 * pi) | ~ 28.274 (9 * pi) |
where
- `theta1` is the angle of the first joint, where an angle of 0 indicates the first link is pointing directly
downwards.
- `theta2` is ***relative to the angle of the first link.***
An angle of 0 corresponds to having the same angle between the two links.
The angular velocities of `theta1` and `theta2` are bounded at ±4π, and ±9π rad/s respectively.
A state of `[1, 0, 1, 0, ..., ...]` indicates that both links are pointing downwards.
### Rewards
The goal is to have the free end reach a designated target height in as few steps as possible,
and as such all steps that do not reach the goal incur a reward of -1.
Achieving the target height results in termination with a reward of 0. The reward threshold is -100.
### Starting State
Each parameter in the underlying state (`theta1`, `theta2`, and the two angular velocities) is initialized
uniformly between -0.1 and 0.1. This means both links are pointing downwards with some initial stochasticity.
### Episode End
The episode ends if one of the following occurs:
1. Termination: The free end reaches the target height, which is constructed as:
`-cos(theta1) - cos(theta2 + theta1) > 1.0`
2. Truncation: Episode length is greater than 500 (200 for v0)
### Arguments
No additional arguments are currently supported.
```
env = gym.make('Acrobot-v1')
```
By default, the dynamics of the acrobot follow those described in Sutton and Barto's book
[Reinforcement Learning: An Introduction](http://incompleteideas.net/book/11/node4.html).
However, a `book_or_nips` parameter can be modified to change the pendulum dynamics to those described
in the original [NeurIPS paper](https://papers.nips.cc/paper/1995/hash/8f1d43620bc6bb580df6e80b0dc05c48-Abstract.html).
```
# To change the dynamics as described above
env.env.book_or_nips = 'nips'
```
See the following note and
the [implementation](https://github.com/openai/gym/blob/master/gym/envs/classic_control/acrobot.py) for details:
> The dynamics equations were missing some terms in the NIPS paper which
are present in the book. R. Sutton confirmed in personal correspondence
that the experimental results shown in the paper and the book were
generated with the equations shown in the book.
However, there is the option to run the domain with the paper equations
by setting `book_or_nips = 'nips'`
### Version History
- v1: Maximum number of steps increased from 200 to 500. The observation space for v0 provided direct readings of
`theta1` and `theta2` in radians, having a range of `[-pi, pi]`. The v1 observation space as described here provides the
sine and cosine of each angle instead.
- v0: Initial versions release (1.0.0) (removed from gym for v1)
### References
- Sutton, R. S. (1996). Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding.
In D. Touretzky, M. C. Mozer, & M. Hasselmo (Eds.), Advances in Neural Information Processing Systems (Vol. 8).
MIT Press. https://proceedings.neurips.cc/paper/1995/file/8f1d43620bc6bb580df6e80b0dc05c48-Paper.pdf
- Sutton, R. S., Barto, A. G. (2018 ). Reinforcement Learning: An Introduction. The MIT Press.
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": 15,
}
dt = 0.2
LINK_LENGTH_1 = 1.0 # [m]
LINK_LENGTH_2 = 1.0 # [m]
LINK_MASS_1 = 1.0 #: [kg] mass of link 1
LINK_MASS_2 = 1.0 #: [kg] mass of link 2
LINK_COM_POS_1 = 0.5 #: [m] position of the center of mass of link 1
LINK_COM_POS_2 = 0.5 #: [m] position of the center of mass of link 2
LINK_MOI = 1.0 #: moments of inertia for both links
MAX_VEL_1 = 4 * pi
MAX_VEL_2 = 9 * pi
AVAIL_TORQUE = [-1.0, 0.0, +1]
torque_noise_max = 0.0
SCREEN_DIM = 500
#: use dynamics equations from the nips paper or the book
book_or_nips = "book"
action_arrow = None
domain_fig = None
actions_num = 3
def __init__(self, render_mode: Optional[str] = None):
self.render_mode = render_mode
self.screen = None
self.clock = None
self.isopen = True
high = np.array(
[1.0, 1.0, 1.0, 1.0, self.MAX_VEL_1, self.MAX_VEL_2], dtype=np.float32
)
low = -high
self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)
self.action_space = spaces.Discrete(3)
self.state = None
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
super().reset(seed=seed)
# Note that if you use custom reset bounds, it may lead to out-of-bound
# state/observations.
low, high = utils.maybe_parse_reset_bounds(
options, -0.1, 0.1 # default low
) # default high
self.state = self.np_random.uniform(low=low, high=high, size=(4,)).astype(
np.float32
)
if self.render_mode == "human":
self.render()
return self._get_ob(), {}
def step(self, a):
s = self.state
assert s is not None, "Call reset before using AcrobotEnv object."
torque = self.AVAIL_TORQUE[a]
# Add noise to the force action
if self.torque_noise_max > 0:
torque += self.np_random.uniform(
-self.torque_noise_max, self.torque_noise_max
)
# Now, augment the state with our force action so it can be passed to
# _dsdt
s_augmented = np.append(s, torque)
ns = rk4(self._dsdt, s_augmented, [0, self.dt])
ns[0] = wrap(ns[0], -pi, pi)
ns[1] = wrap(ns[1], -pi, pi)
ns[2] = bound(ns[2], -self.MAX_VEL_1, self.MAX_VEL_1)
ns[3] = bound(ns[3], -self.MAX_VEL_2, self.MAX_VEL_2)
self.state = ns
terminated = self._terminal()
reward = -1.0 if not terminated else 0.0
if self.render_mode == "human":
self.render()
return (self._get_ob(), reward, terminated, False, {})
def _get_ob(self):
s = self.state
assert s is not None, "Call reset before using AcrobotEnv object."
return np.array(
[cos(s[0]), sin(s[0]), cos(s[1]), sin(s[1]), s[2], s[3]], dtype=np.float32
)
def _terminal(self):
s = self.state
assert s is not None, "Call reset before using AcrobotEnv object."
return bool(-cos(s[0]) - cos(s[1] + s[0]) > 1.0)
def _dsdt(self, s_augmented):
m1 = self.LINK_MASS_1
m2 = self.LINK_MASS_2
l1 = self.LINK_LENGTH_1
lc1 = self.LINK_COM_POS_1
lc2 = self.LINK_COM_POS_2
I1 = self.LINK_MOI
I2 = self.LINK_MOI
g = 9.8
a = s_augmented[-1]
s = s_augmented[:-1]
theta1 = s[0]
theta2 = s[1]
dtheta1 = s[2]
dtheta2 = s[3]
d1 = (
m1 * lc1**2
+ m2 * (l1**2 + lc2**2 + 2 * l1 * lc2 * cos(theta2))
+ I1
+ I2
)
d2 = m2 * (lc2**2 + l1 * lc2 * cos(theta2)) + I2
phi2 = m2 * lc2 * g * cos(theta1 + theta2 - pi / 2.0)
phi1 = (
-m2 * l1 * lc2 * dtheta2**2 * sin(theta2)
- 2 * m2 * l1 * lc2 * dtheta2 * dtheta1 * sin(theta2)
+ (m1 * lc1 + m2 * l1) * g * cos(theta1 - pi / 2)
+ phi2
)
if self.book_or_nips == "nips":
# the following line is consistent with the description in the
# paper
ddtheta2 = (a + d2 / d1 * phi1 - phi2) / (m2 * lc2**2 + I2 - d2**2 / d1)
else:
# the following line is consistent with the java implementation and the
# book
ddtheta2 = (
a + d2 / d1 * phi1 - m2 * l1 * lc2 * dtheta1**2 * sin(theta2) - phi2
) / (m2 * lc2**2 + I2 - d2**2 / d1)
ddtheta1 = -(d2 * ddtheta2 + phi1) / d1
return dtheta1, dtheta2, ddtheta1, ddtheta2, 0.0
def render(self):
if self.render_mode is None:
logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
from pygame import gfxdraw
except ImportError:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gym[classic_control]`"
)
if self.screen is None:
pygame.init()
if self.render_mode == "human":
pygame.display.init()
self.screen = pygame.display.set_mode(
(self.SCREEN_DIM, self.SCREEN_DIM)
)
else: # mode in "rgb_array"
self.screen = pygame.Surface((self.SCREEN_DIM, self.SCREEN_DIM))
if self.clock is None:
self.clock = pygame.time.Clock()
surf = pygame.Surface((self.SCREEN_DIM, self.SCREEN_DIM))
surf.fill((255, 255, 255))
s = self.state
bound = self.LINK_LENGTH_1 + self.LINK_LENGTH_2 + 0.2 # 2.2 for default
scale = self.SCREEN_DIM / (bound * 2)
offset = self.SCREEN_DIM / 2
if s is None:
return None
p1 = [
-self.LINK_LENGTH_1 * cos(s[0]) * scale,
self.LINK_LENGTH_1 * sin(s[0]) * scale,
]
p2 = [
p1[0] - self.LINK_LENGTH_2 * cos(s[0] + s[1]) * scale,
p1[1] + self.LINK_LENGTH_2 * sin(s[0] + s[1]) * scale,
]
xys = np.array([[0, 0], p1, p2])[:, ::-1]
thetas = [s[0] - pi / 2, s[0] + s[1] - pi / 2]
link_lengths = [self.LINK_LENGTH_1 * scale, self.LINK_LENGTH_2 * scale]
pygame.draw.line(
surf,
start_pos=(-2.2 * scale + offset, 1 * scale + offset),
end_pos=(2.2 * scale + offset, 1 * scale + offset),
color=(0, 0, 0),
)
for ((x, y), th, llen) in zip(xys, thetas, link_lengths):
x = x + offset
y = y + offset
l, r, t, b = 0, llen, 0.1 * scale, -0.1 * scale
coords = [(l, b), (l, t), (r, t), (r, b)]
transformed_coords = []
for coord in coords:
coord = pygame.math.Vector2(coord).rotate_rad(th)
coord = (coord[0] + x, coord[1] + y)
transformed_coords.append(coord)
gfxdraw.aapolygon(surf, transformed_coords, (0, 204, 204))
gfxdraw.filled_polygon(surf, transformed_coords, (0, 204, 204))
gfxdraw.aacircle(surf, int(x), int(y), int(0.1 * scale), (204, 204, 0))
gfxdraw.filled_circle(surf, int(x), int(y), int(0.1 * scale), (204, 204, 0))
surf = pygame.transform.flip(surf, False, True)
self.screen.blit(surf, (0, 0))
if self.render_mode == "human":
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
pygame.display.flip()
elif self.render_mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
)
def close(self):
if self.screen is not None:
import pygame
pygame.display.quit()
pygame.quit()
self.isopen = False
def wrap(x, m, M):
"""Wraps ``x`` so m <= x <= M; but unlike ``bound()`` which
truncates, ``wrap()`` wraps x around the coordinate system defined by m,M.\n
For example, m = -180, M = 180 (degrees), x = 360 --> returns 0.
Args:
x: a scalar
m: minimum possible value in range
M: maximum possible value in range
Returns:
x: a scalar, wrapped
"""
diff = M - m
while x > M:
x = x - diff
while x < m:
x = x + diff
return x
def bound(x, m, M=None):
"""Either have m as scalar, so bound(x,m,M) which returns m <= x <= M *OR*
have m as length 2 vector, bound(x,m, <IGNORED>) returns m[0] <= x <= m[1].
Args:
x: scalar
m: The lower bound
M: The upper bound
Returns:
x: scalar, bound between min (m) and Max (M)
"""
if M is None:
M = m[1]
m = m[0]
# bound x between min (m) and Max (M)
return min(max(x, m), M)
def rk4(derivs, y0, t):
"""
Integrate 1-D or N-D system of ODEs using 4-th order Runge-Kutta.
Example for 2D system:
>>> def derivs(x):
... d1 = x[0] + 2*x[1]
... d2 = -3*x[0] + 4*x[1]
... return d1, d2
>>> dt = 0.0005
>>> t = np.arange(0.0, 2.0, dt)
>>> y0 = (1,2)
>>> yout = rk4(derivs, y0, t)
Args:
derivs: the derivative of the system and has the signature ``dy = derivs(yi)``
y0: initial state vector
t: sample times
Returns:
yout: Runge-Kutta approximation of the ODE
"""
try:
Ny = len(y0)
except TypeError:
yout = np.zeros((len(t),), np.float_)
else:
yout = np.zeros((len(t), Ny), np.float_)
yout[0] = y0
for i in np.arange(len(t) - 1):
this = t[i]
dt = t[i + 1] - this
dt2 = dt / 2.0
y0 = yout[i]
k1 = np.asarray(derivs(y0))
k2 = np.asarray(derivs(y0 + dt2 * k1))
k3 = np.asarray(derivs(y0 + dt2 * k2))
k4 = np.asarray(derivs(y0 + dt * k3))
yout[i + 1] = y0 + dt / 6.0 * (k1 + 2 * k2 + 2 * k3 + k4)
# We only care about the final timestep and we cleave off action value which will be zero
return yout[-1][:4]
================================================
FILE: gym/envs/classic_control/cartpole.py
================================================
"""
Classic cart-pole system implemented by Rich Sutton et al.
Copied from http://incompleteideas.net/sutton/book/code/pole.c
permalink: https://perma.cc/C9ZM-652R
"""
import math
from typing import Optional, Union
import numpy as np
import gym
from gym import logger, spaces
from gym.envs.classic_control import utils
from gym.error import DependencyNotInstalled
class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
"""
### Description
This environment corresponds to the version of the cart-pole problem described by Barto, Sutton, and Anderson in
["Neuronlike Adaptive Elements That Can Solve Difficult Learning Control Problem"](https://ieeexplore.ieee.org/document/6313077).
A pole is attached by an un-actuated joint to a cart, which moves along a frictionless track.
The pendulum is placed upright on the cart and the goal is to balance the pole by applying forces
in the left and right direction on the cart.
### Action Space
The action is a `ndarray` with shape `(1,)` which can take values `{0, 1}` indicating the direction
of the fixed force the cart is pushed with.
| Num | Action |
|-----|------------------------|
| 0 | Push cart to the left |
| 1 | Push cart to the right |
**Note**: The velocity that is reduced or increased by the applied force is not fixed and it depends on the angle
the pole is pointing. The center of gravity of the pole varies the amount of energy needed to move the cart underneath it
### Observation Space
The observation is a `ndarray` with shape `(4,)` with the values corresponding to the following positions and velocities:
| Num | Observation | Min | Max |
|-----|-----------------------|---------------------|-------------------|
| 0 | Cart Position | -4.8 | 4.8 |
| 1 | Cart Velocity | -Inf | Inf |
| 2 | Pole Angle | ~ -0.418 rad (-24°) | ~ 0.418 rad (24°) |
| 3 | Pole Angular Velocity | -Inf | Inf |
**Note:** While the ranges above denote the possible values for observation space of each element,
it is not reflective of the allowed values of the state space in an unterminated episode. Particularly:
- The cart x-position (index 0) can be take values between `(-4.8, 4.8)`, but the episode terminates
if the cart leaves the `(-2.4, 2.4)` range.
- The pole angle can be observed between `(-.418, .418)` radians (or **±24°**), but the episode terminates
if the pole angle is not in the range `(-.2095, .2095)` (or **±12°**)
### Rewards
Since the goal is to keep the pole upright for as long as possible, a reward of `+1` for every step taken,
including the termination step, is allotted. The threshold for rewards is 475 for v1.
### Starting State
All observations are assigned a uniformly random value in `(-0.05, 0.05)`
### Episode End
The episode ends if any one of the following occurs:
1. Termination: Pole Angle is greater than ±12°
2. Termination: Cart Position is greater than ±2.4 (center of the cart reaches the edge of the display)
3. Truncation: Episode length is greater than 500 (200 for v0)
### Arguments
```
gym.make('CartPole-v1')
```
No additional arguments are currently supported.
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": 50,
}
def __init__(self, render_mode: Optional[str] = None):
self.gravity = 9.8
self.masscart = 1.0
self.masspole = 0.1
self.total_mass = self.masspole + self.masscart
self.length = 0.5 # actually half the pole's length
self.polemass_length = self.masspole * self.length
self.force_mag = 10.0
self.tau = 0.02 # seconds between state updates
self.kinematics_integrator = "euler"
# Angle at which to fail the episode
self.theta_threshold_radians = 12 * 2 * math.pi / 360
self.x_threshold = 2.4
# Angle limit set to 2 * theta_threshold_radians so failing observation
# is still within bounds.
high = np.array(
[
self.x_threshold * 2,
np.finfo(np.float32).max,
self.theta_threshold_radians * 2,
np.finfo(np.float32).max,
],
dtype=np.float32,
)
self.action_space = spaces.Discrete(2)
self.observation_space = spaces.Box(-high, high, dtype=np.float32)
self.render_mode = render_mode
self.screen_width = 600
self.screen_height = 400
self.screen = None
self.clock = None
self.isopen = True
self.state = None
self.steps_beyond_terminated = None
def step(self, action):
err_msg = f"{action!r} ({type(action)}) invalid"
assert self.action_space.contains(action), err_msg
assert self.state is not None, "Call reset before using step method."
x, x_dot, theta, theta_dot = self.state
force = self.force_mag if action == 1 else -self.force_mag
costheta = math.cos(theta)
sintheta = math.sin(theta)
# For the interested reader:
# https://coneural.org/florian/papers/05_cart_pole.pdf
temp = (
force + self.polemass_length * theta_dot**2 * sintheta
) / self.total_mass
thetaacc = (self.gravity * sintheta - costheta * temp) / (
self.length * (4.0 / 3.0 - self.masspole * costheta**2 / self.total_mass)
)
xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass
if self.kinematics_integrator == "euler":
x = x + self.tau * x_dot
x_dot = x_dot + self.tau * xacc
theta = theta + self.tau * theta_dot
theta_dot = theta_dot + self.tau * thetaacc
else: # semi-implicit euler
x_dot = x_dot + self.tau * xacc
x = x + self.tau * x_dot
theta_dot = theta_dot + self.tau * thetaacc
theta = theta + self.tau * theta_dot
self.state = (x, x_dot, theta, theta_dot)
terminated = bool(
x < -self.x_threshold
or x > self.x_threshold
or theta < -self.theta_threshold_radians
or theta > self.theta_threshold_radians
)
if not terminated:
reward = 1.0
elif self.steps_beyond_terminated is None:
# Pole just fell!
self.steps_beyond_terminated = 0
reward = 1.0
else:
if self.steps_beyond_terminated == 0:
logger.warn(
"You are calling 'step()' even though this "
"environment has already returned terminated = True. You "
"should always call 'reset()' once you receive 'terminated = "
"True' -- any further steps are undefined behavior."
)
self.steps_beyond_terminated += 1
reward = 0.0
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
# Note that if you use custom reset bounds, it may lead to out-of-bound
# state/observations.
low, high = utils.maybe_parse_reset_bounds(
options, -0.05, 0.05 # default low
) # default high
self.state = self.np_random.uniform(low=low, high=high, size=(4,))
self.steps_beyond_terminated = None
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), {}
def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
from pygame import gfxdraw
except ImportError:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gym[classic_control]`"
)
if self.screen is None:
pygame.init()
if self.render_mode == "human":
pygame.display.init()
self.screen = pygame.display.set_mode(
(self.screen_width, self.screen_height)
)
else: # mode == "rgb_array"
self.screen = pygame.Surface((self.screen_width, self.screen_height))
if self.clock is None:
self.clock = pygame.time.Clock()
world_width = self.x_threshold * 2
scale = self.screen_width / world_width
polewidth = 10.0
polelen = scale * (2 * self.length)
cartwidth = 50.0
cartheight = 30.0
if self.state is None:
return None
x = self.state
self.surf = pygame.Surface((self.screen_width, self.screen_height))
self.surf.fill((255, 255, 255))
l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2
axleoffset = cartheight / 4.0
cartx = x[0] * scale + self.screen_width / 2.0 # MIDDLE OF CART
carty = 100 # TOP OF CART
cart_coords = [(l, b), (l, t), (r, t), (r, b)]
cart_coords = [(c[0] + cartx, c[1] + carty) for c in cart_coords]
gfxdraw.aapolygon(self.surf, cart_coords, (0, 0, 0))
gfxdraw.filled_polygon(self.surf, cart_coords, (0, 0, 0))
l, r, t, b = (
-polewidth / 2,
polewidth / 2,
polelen - polewidth / 2,
-polewidth / 2,
)
pole_coords = []
for coord in [(l, b), (l, t), (r, t), (r, b)]:
coord = pygame.math.Vector2(coord).rotate_rad(-x[2])
coord = (coord[0] + cartx, coord[1] + carty + axleoffset)
pole_coords.append(coord)
gfxdraw.aapolygon(self.surf, pole_coords, (202, 152, 101))
gfxdraw.filled_polygon(self.surf, pole_coords, (202, 152, 101))
gfxdraw.aacircle(
self.surf,
int(cartx),
int(carty + axleoffset),
int(polewidth / 2),
(129, 132, 203),
)
gfxdraw.filled_circle(
self.surf,
int(cartx),
int(carty + axleoffset),
int(polewidth / 2),
(129, 132, 203),
)
gfxdraw.hline(self.surf, 0, self.screen_width, carty, (0, 0, 0))
self.surf = pygame.transform.flip(self.surf, False, True)
self.screen.blit(self.surf, (0, 0))
if self.render_mode == "human":
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
pygame.display.flip()
elif self.render_mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
)
def close(self):
if self.screen is not None:
import pygame
pygame.display.quit()
pygame.quit()
self.isopen = False
================================================
FILE: gym/envs/classic_control/continuous_mountain_car.py
================================================
"""
@author: Olivier Sigaud
A merge between two sources:
* Adaptation of the MountainCar Environment from the "FAReinforcement" library
of Jose Antonio Martin H. (version 1.0), adapted by 'Tom Schaul, tom@idsia.ch'
and then modified by Arnaud de Broissia
* the gym MountainCar environment
itself from
http://incompleteideas.net/sutton/MountainCar/MountainCar1.cp
permalink: https://perma.cc/6Z2N-PFWC
"""
import math
from typing import Optional
import numpy as np
import gym
from gym import spaces
from gym.envs.classic_control import utils
from gym.error import DependencyNotInstalled
class Continuous_MountainCarEnv(gym.Env):
"""
### Description
The Mountain Car MDP is a deterministic MDP that consists of a car placed stochastically
at the bottom of a sinusoidal valley, with the only possible actions being the accelerations
that can be applied to the car in either direction. The goal of the MDP is to strategically
accelerate the car to reach the goal state on top of the right hill. There are two versions
of the mountain car domain in gym: one with discrete actions and one with continuous.
This version is the one with continuous actions.
This MDP first appeared in [Andrew Moore's PhD Thesis (1990)](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-209.pdf)
```
@TECHREPORT{Moore90efficientmemory-based,
author = {Andrew William Moore},
title = {Efficient Memory-based Learning for Robot Control},
institution = {University of Cambridge},
year = {1990}
}
```
### Observation Space
The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following:
| Num | Observation | Min | Max | Unit |
|-----|--------------------------------------|------|-----|--------------|
| 0 | position of the car along the x-axis | -Inf | Inf | position (m) |
| 1 | velocity of the car | -Inf | Inf | position (m) |
### Action Space
The action is a `ndarray` with shape `(1,)`, representing the directional force applied on the car.
The action is clipped in the range `[-1,1]` and multiplied by a power of 0.0015.
### Transition Dynamics:
Given an action, the mountain car follows the following transition dynamics:
*velocity<sub>t+1</sub> = velocity<sub>t+1</sub> + force * self.power - 0.0025 * cos(3 * position<sub>t</sub>)*
*position<sub>t+1</sub> = position<sub>t</sub> + velocity<sub>t+1</sub>*
where force is the action clipped to the range `[-1,1]` and power is a constant 0.0015.
The collisions at either end are inelastic with the velocity set to 0 upon collision with the wall.
The position is clipped to the range [-1.2, 0.6] and velocity is clipped to the range [-0.07, 0.07].
### Reward
A negative reward of *-0.1 * action<sup>2</sup>* is received at each timestep to penalise for
taking actions of large magnitude. If the mountain car reaches the goal then a positive reward of +100
is added to the negative reward for that timestep.
### Starting State
The position of the car is assigned a uniform random value in `[-0.6 , -0.4]`.
The starting velocity of the car is always assigned to 0.
### Episode End
The episode ends if either of the following happens:
1. Termination: The position of the car is greater than or equal to 0.45 (the goal position on top of the right hill)
2. Truncation: The length of the episode is 999.
### Arguments
```
gym.make('MountainCarContinuous-v0')
```
### Version History
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": 30,
}
def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
self.min_action = -1.0
self.max_action = 1.0
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = (
0.45 # was 0.5 in gym, 0.45 in Arnaud de Broissia's version
)
self.goal_velocity = goal_velocity
self.power = 0.0015
self.low_state = np.array(
[self.min_position, -self.max_speed], dtype=np.float32
)
self.high_state = np.array(
[self.max_position, self.max_speed], dtype=np.float32
)
self.render_mode = render_mode
self.screen_width = 600
self.screen_height = 400
self.screen = None
self.clock = None
self.isopen = True
self.action_space = spaces.Box(
low=self.min_action, high=self.max_action, shape=(1,), dtype=np.float32
)
self.observation_space = spaces.Box(
low=self.low_state, high=self.high_state, dtype=np.float32
)
def step(self, action: np.ndarray):
position = self.state[0]
velocity = self.state[1]
force = min(max(action[0], self.min_action), self.max_action)
velocity += force * self.power - 0.0025 * math.cos(3 * position)
if velocity > self.max_speed:
velocity = self.max_speed
if velocity < -self.max_speed:
velocity = -self.max_speed
position += velocity
if position > self.max_position:
position = self.max_position
if position < self.min_position:
position = self.min_position
if position == self.min_position and velocity < 0:
velocity = 0
# Convert a possible numpy bool to a Python bool.
terminated = bool(
position >= self.goal_position and velocity >= self.goal_velocity
)
reward = 0
if terminated:
reward = 100.0
reward -= math.pow(action[0], 2) * 0.1
self.state = np.array([position, velocity], dtype=np.float32)
if self.render_mode == "human":
self.render()
return self.state, reward, terminated, False, {}
def reset(self, *, seed: Optional[int] = None, options: Optional[dict] = None):
super().reset(seed=seed)
# Note that if you use custom reset bounds, it may lead to out-of-bound
# state/observations.
low, high = utils.maybe_parse_reset_bounds(options, -0.6, -0.4)
self.state = np.array([self.np_random.uniform(low=low, high=high), 0])
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), {}
def _height(self, xs):
return np.sin(3 * xs) * 0.45 + 0.55
def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mode="rgb_array")'
)
return
try:
import pygame
from pygame import gfxdraw
except ImportError:
raise DependencyNotInstalled(
"pygame is not installed, run `pip install gym[classic_control]`"
)
if self.screen is None:
pygame.init()
if self.render_mode == "human":
pygame.display.init()
self.screen = pygame.display.set_mode(
(self.screen_width, self.screen_height)
)
else: # mode == "rgb_array":
self.screen = pygame.Surface((self.screen_width, self.screen_height))
if self.clock is None:
self.clock = pygame.time.Clock()
world_width = self.max_position - self.min_position
scale = self.screen_width / world_width
carwidth = 40
carheight = 20
self.surf = pygame.Surface((self.screen_width, self.screen_height))
self.surf.fill((255, 255, 255))
pos = self.state[0]
xs = np.linspace(self.min_position, self.max_position, 100)
ys = self._height(xs)
xys = list(zip((xs - self.min_position) * scale, ys * scale))
pygame.draw.aalines(self.surf, points=xys, closed=False, color=(0, 0, 0))
clearance = 10
l, r, t, b = -carwidth / 2, carwidth / 2, carheight, 0
coords = []
for c in [(l, b), (l, t), (r, t), (r, b)]:
c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
coords.append(
(
c[0] + (pos - self.min_position) * scale,
c[1] + clearance + self._height(pos) * scale,
)
)
gfxdraw.aapolygon(self.surf, coords, (0, 0, 0))
gfxdraw.filled_polygon(self.surf, coords, (0, 0, 0))
for c in [(carwidth / 4, 0), (-carwidth / 4, 0)]:
c = pygame.math.Vector2(c).rotate_rad(math.cos(3 * pos))
wheel = (
int(c[0] + (pos - self.min_position) * scale),
int(c[1] + clearance + self._height(pos) * scale),
)
gfxdraw.aacircle(
self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
)
gfxdraw.filled_circle(
self.surf, wheel[0], wheel[1], int(carheight / 2.5), (128, 128, 128)
)
flagx = int((self.goal_position - self.min_position) * scale)
flagy1 = int(self._height(self.goal_position) * scale)
flagy2 = flagy1 + 50
gfxdraw.vline(self.surf, flagx, flagy1, flagy2, (0, 0, 0))
gfxdraw.aapolygon(
self.surf,
[(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)],
(204, 204, 0),
)
gfxdraw.filled_polygon(
self.surf,
[(flagx, flagy2), (flagx, flagy2 - 10), (flagx + 25, flagy2 - 5)],
(204, 204, 0),
)
self.surf = pygame.transform.flip(self.surf, False, True)
self.screen.blit(self.surf, (0, 0))
if self.render_mode == "human":
pygame.event.pump()
self.clock.tick(self.metadata["render_fps"])
pygame.display.flip()
elif self.render_mode == "rgb_array":
return np.transpose(
np.array(pygame.surfarray.pixels3d(self.screen)), axes=(1, 0, 2)
)
def close(self):
if self.screen is not None:
import pygame
pygame.display.quit()
pygame.quit()
self.isopen = False
================================================
FILE: gym/envs/classic_control/mountain_car.py
================================================
"""
http://incompleteideas.net/MountainCar/MountainCar1.cp
permalink: https://perma.cc/6Z2N-PFWC
"""
import math
from typing import Optional
import numpy as np
import gym
from gym import spaces
from gym.envs.classic_control import utils
from gym.error import DependencyNotInstalled
class MountainCarEnv(gym.Env):
"""
### Description
The Mountain Car MDP is a deterministic MDP that consists of a car placed stochastically
at the bottom of a sinusoidal valley, with the only possible actions being the accelerations
that can be applied to the car in either direction. The goal of the MDP is to strategically
accelerate the car to reach the goal state on top of the right hill. There are two versions
of the mountain car domain in gym: one with discrete actions and one with continuous.
This version is the one with discrete actions.
This MDP first appeared in [Andrew Moore's PhD Thesis (1990)](https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-209.pdf)
```
@TECHREPORT{Moore90efficientmemory-based,
author = {Andrew William Moore},
title = {Efficient Memory-based Learning for Robot Control},
institution = {University of Cambridge},
year = {1990}
}
```
### Observation Space
The observation is a `ndarray` with shape `(2,)` where the elements correspond to the following:
| Num | Observation | Min | Max | Unit |
|-----|--------------------------------------|------|-----|--------------|
| 0 | position of the car along the x-axis | -Inf | Inf | position (m) |
| 1 | velocity of the car | -Inf | Inf | position (m) |
### Action Space
There are 3 discrete deterministic actions:
| Num | Observation | Value | Unit |
|-----|-------------------------|-------|--------------|
| 0 | Accelerate to the left | Inf | position (m) |
| 1 | Don't accelerate | Inf | position (m) |
| 2 | Accelerate to the right | Inf | position (m) |
### Transition Dynamics:
Given an action, the mountain car follows the following transition dynamics:
*velocity<sub>t+1</sub> = velocity<sub>t</sub> + (action - 1) * force - cos(3 * position<sub>t</sub>) * gravity*
*position<sub>t+1</sub> = position<sub>t</sub> + velocity<sub>t+1</sub>*
where force = 0.001 and gravity = 0.0025. The collisions at either end are inelastic with the velocity set to 0
upon collision with the wall. The position is clipped to the range `[-1.2, 0.6]` and
velocity is clipped to the range `[-0.07, 0.07]`.
### Reward:
The goal is to reach the flag placed on top of the right hill as quickly as possible, as such the agent is
penalised with a reward of -1 for each timestep.
### Starting State
The position of the car is assigned a uniform random value in *[-0.6 , -0.4]*.
The starting velocity of the car is always assigned to 0.
### Episode End
The episode ends if either of the following happens:
1. Termination: The position of the car is greater than or equal to 0.5 (the goal position on top of the right hill)
2. Truncation: The length of the episode is 200.
### Arguments
```
gym.make('MountainCar-v0')
```
### Version History
* v0: Initial versions release (1.0.0)
"""
metadata = {
"render_modes": ["human", "rgb_array"],
"render_fps": 30,
}
def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
self.min_position = -1.2
self.max_position = 0.6
self.max_speed = 0.07
self.goal_position = 0.5
self.goal_velocity = goal_velocity
self.force = 0.001
self.gravity = 0.0025
self.low = np.array([self.min_position, -self.max_speed], dtype=np.float32)
self.high = np.array([self.max_position, self.max_speed], dtype=np.float32)
self.render_mode = render_mode
self.screen_width = 600
self.screen_height = 400
self.screen = None
self.clock = None
self.isopen = True
self.action_space = spaces.Discrete(3)
self.observation_space = spaces.Box(self.low, self.high, dtype=np.float32)
def step(self, action: int):
assert self.action_space.contains(
action
), f"{action!r} ({type(action)}) invalid"
position, velocity = self.state
velocity += (action - 1) * self.force + math.cos(3 * position) * (-self.gravity)
velocity = np.clip(velocity, -self.max_speed, self.max_speed)
position += velocity
position = np.clip(position, self.min_position, self.max_position)
if position == self.min_position and velocity < 0:
velocity = 0
terminated = bool(
position >= self.goal_position and velocity >= self.goal_velocity
)
reward = -1.0
self.state = (position, velocity)
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), reward, terminated, False, {}
def reset(
self,
*,
seed: Optional[int] = None,
options: Optional[dict] = None,
):
super().reset(seed=seed)
# Note that if you use custom reset bounds, it may lead to out-of-bound
# state/observations.
low, high = utils.maybe_parse_reset_bounds(options, -0.6, -0.4)
self.state = np.array([self.np_random.uniform(low=low, high=high), 0])
if self.render_mode == "human":
self.render()
return np.array(self.state, dtype=np.float32), {}
def _height(self, xs):
return np.sin(3 * xs) * 0.45 + 0.55
def render(self):
if self.render_mode is None:
gym.logger.warn(
"You are calling render method without specifying any render mode. "
"You can specify the render_mode at initialization, "
f'e.g. gym("{self.spec.id}", render_mod
gitextract_3_1zpoik/
├── .github/
│ ├── ISSUE_TEMPLATE/
│ │ ├── bug.md
│ │ ├── proposal.md
│ │ └── question.md
│ ├── PULL_REQUEST_TEMPLATE.md
│ ├── stale.yml
│ └── workflows/
│ ├── build.yml
│ └── pre-commit.yml
├── .gitignore
├── .pre-commit-config.yaml
├── CODE_OF_CONDUCT.rst
├── CONTRIBUTING.md
├── LICENSE.md
├── README.md
├── bin/
│ └── docker_entrypoint
├── gym/
│ ├── __init__.py
│ ├── core.py
│ ├── envs/
│ │ ├── __init__.py
│ │ ├── box2d/
│ │ │ ├── __init__.py
│ │ │ ├── bipedal_walker.py
│ │ │ ├── car_dynamics.py
│ │ │ ├── car_racing.py
│ │ │ └── lunar_lander.py
│ │ ├── classic_control/
│ │ │ ├── __init__.py
│ │ │ ├── acrobot.py
│ │ │ ├── cartpole.py
│ │ │ ├── continuous_mountain_car.py
│ │ │ ├── mountain_car.py
│ │ │ ├── pendulum.py
│ │ │ └── utils.py
│ │ ├── mujoco/
│ │ │ ├── __init__.py
│ │ │ ├── ant.py
│ │ │ ├── ant_v3.py
│ │ │ ├── ant_v4.py
│ │ │ ├── assets/
│ │ │ │ ├── ant.xml
│ │ │ │ ├── half_cheetah.xml
│ │ │ │ ├── hopper.xml
│ │ │ │ ├── humanoid.xml
│ │ │ │ ├── humanoidstandup.xml
│ │ │ │ ├── inverted_double_pendulum.xml
│ │ │ │ ├── inverted_pendulum.xml
│ │ │ │ ├── point.xml
│ │ │ │ ├── pusher.xml
│ │ │ │ ├── reacher.xml
│ │ │ │ ├── swimmer.xml
│ │ │ │ └── walker2d.xml
│ │ │ ├── half_cheetah.py
│ │ │ ├── half_cheetah_v3.py
│ │ │ ├── half_cheetah_v4.py
│ │ │ ├── hopper.py
│ │ │ ├── hopper_v3.py
│ │ │ ├── hopper_v4.py
│ │ │ ├── humanoid.py
│ │ │ ├── humanoid_v3.py
│ │ │ ├── humanoid_v4.py
│ │ │ ├── humanoidstandup.py
│ │ │ ├── humanoidstandup_v4.py
│ │ │ ├── inverted_double_pendulum.py
│ │ │ ├── inverted_double_pendulum_v4.py
│ │ │ ├── inverted_pendulum.py
│ │ │ ├── inverted_pendulum_v4.py
│ │ │ ├── mujoco_env.py
│ │ │ ├── mujoco_rendering.py
│ │ │ ├── pusher.py
│ │ │ ├── pusher_v4.py
│ │ │ ├── reacher.py
│ │ │ ├── reacher_v4.py
│ │ │ ├── swimmer.py
│ │ │ ├── swimmer_v3.py
│ │ │ ├── swimmer_v4.py
│ │ │ ├── walker2d.py
│ │ │ ├── walker2d_v3.py
│ │ │ └── walker2d_v4.py
│ │ ├── registration.py
│ │ └── toy_text/
│ │ ├── __init__.py
│ │ ├── blackjack.py
│ │ ├── cliffwalking.py
│ │ ├── frozen_lake.py
│ │ ├── taxi.py
│ │ └── utils.py
│ ├── error.py
│ ├── logger.py
│ ├── py.typed
│ ├── spaces/
│ │ ├── __init__.py
│ │ ├── box.py
│ │ ├── dict.py
│ │ ├── discrete.py
│ │ ├── graph.py
│ │ ├── multi_binary.py
│ │ ├── multi_discrete.py
│ │ ├── sequence.py
│ │ ├── space.py
│ │ ├── text.py
│ │ ├── tuple.py
│ │ └── utils.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── colorize.py
│ │ ├── env_checker.py
│ │ ├── ezpickle.py
│ │ ├── passive_env_checker.py
│ │ ├── play.py
│ │ ├── save_video.py
│ │ ├── seeding.py
│ │ └── step_api_compatibility.py
│ ├── vector/
│ │ ├── __init__.py
│ │ ├── async_vector_env.py
│ │ ├── sync_vector_env.py
│ │ ├── utils/
│ │ │ ├── __init__.py
│ │ │ ├── misc.py
│ │ │ ├── numpy_utils.py
│ │ │ ├── shared_memory.py
│ │ │ └── spaces.py
│ │ └── vector_env.py
│ ├── version.py
│ └── wrappers/
│ ├── README.md
│ ├── __init__.py
│ ├── atari_preprocessing.py
│ ├── autoreset.py
│ ├── clip_action.py
│ ├── compatibility.py
│ ├── env_checker.py
│ ├── filter_observation.py
│ ├── flatten_observation.py
│ ├── frame_stack.py
│ ├── gray_scale_observation.py
│ ├── human_rendering.py
│ ├── monitoring/
│ │ ├── __init__.py
│ │ └── video_recorder.py
│ ├── normalize.py
│ ├── order_enforcing.py
│ ├── pixel_observation.py
│ ├── record_episode_statistics.py
│ ├── record_video.py
│ ├── render_collection.py
│ ├── rescale_action.py
│ ├── resize_observation.py
│ ├── step_api_compatibility.py
│ ├── time_aware_observation.py
│ ├── time_limit.py
│ ├── transform_observation.py
│ ├── transform_reward.py
│ └── vector_list_info.py
├── py.Dockerfile
├── pyproject.toml
├── requirements.txt
├── setup.py
├── test_requirements.txt
└── tests/
├── __init__.py
├── envs/
│ ├── __init__.py
│ ├── test_action_dim_check.py
│ ├── test_compatibility.py
│ ├── test_env_implementation.py
│ ├── test_envs.py
│ ├── test_make.py
│ ├── test_mujoco.py
│ ├── test_register.py
│ ├── test_spec.py
│ ├── utils.py
│ └── utils_envs.py
├── spaces/
│ ├── __init__.py
│ ├── test_box.py
│ ├── test_dict.py
│ ├── test_discrete.py
│ ├── test_graph.py
│ ├── test_multibinary.py
│ ├── test_multidiscrete.py
│ ├── test_sequence.py
│ ├── test_space.py
│ ├── test_spaces.py
│ ├── test_text.py
│ ├── test_tuple.py
│ ├── test_utils.py
│ └── utils.py
├── test_core.py
├── testing_env.py
├── utils/
│ ├── __init__.py
│ ├── test_env_checker.py
│ ├── test_passive_env_checker.py
│ ├── test_play.py
│ ├── test_save_video.py
│ ├── test_seeding.py
│ └── test_step_api_compatibility.py
├── vector/
│ ├── __init__.py
│ ├── test_async_vector_env.py
│ ├── test_numpy_utils.py
│ ├── test_shared_memory.py
│ ├── test_spaces.py
│ ├── test_sync_vector_env.py
│ ├── test_vector_env.py
│ ├── test_vector_env_info.py
│ ├── test_vector_env_wrapper.py
│ ├── test_vector_make.py
│ └── utils.py
└── wrappers/
├── __init__.py
├── test_atari_preprocessing.py
├── test_autoreset.py
├── test_clip_action.py
├── test_filter_observation.py
├── test_flatten.py
├── test_flatten_observation.py
├── test_frame_stack.py
├── test_gray_scale_observation.py
├── test_human_rendering.py
├── test_nested_dict.py
├── test_normalize.py
├── test_order_enforcing.py
├── test_passive_env_checker.py
├── test_pixel_observation.py
├── test_record_episode_statistics.py
├── test_record_video.py
├── test_rescale_action.py
├── test_resize_observation.py
├── test_step_compatibility.py
├── test_time_aware_observation.py
├── test_time_limit.py
├── test_transform_observation.py
├── test_transform_reward.py
├── test_vector_list_info.py
├── test_video_recorder.py
└── utils.py
SYMBOL INDEX (1374 symbols across 167 files)
FILE: gym/core.py
class Env (line 35) | class Env(Generic[ObsType, ActType]):
method np_random (line 76) | def np_random(self) -> np.random.Generator:
method np_random (line 83) | def np_random(self, value: np.random.Generator):
method step (line 86) | def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, d...
method reset (line 117) | def reset(
method render (line 153) | def render(self) -> Optional[Union[RenderFrame, List[RenderFrame]]]:
method close (line 178) | def close(self):
method unwrapped (line 187) | def unwrapped(self) -> "Env":
method __str__ (line 195) | def __str__(self):
method __enter__ (line 202) | def __enter__(self):
method __exit__ (line 206) | def __exit__(self, *args):
class Wrapper (line 213) | class Wrapper(Env[ObsType, ActType]):
method __init__ (line 224) | def __init__(self, env: Env):
method __getattr__ (line 237) | def __getattr__(self, name):
method spec (line 244) | def spec(self):
method class_name (line 249) | def class_name(cls):
method action_space (line 254) | def action_space(self) -> spaces.Space[ActType]:
method action_space (line 261) | def action_space(self, space: spaces.Space):
method observation_space (line 265) | def observation_space(self) -> spaces.Space:
method observation_space (line 272) | def observation_space(self, space: spaces.Space):
method reward_range (line 276) | def reward_range(self) -> Tuple[SupportsFloat, SupportsFloat]:
method reward_range (line 283) | def reward_range(self, value: Tuple[SupportsFloat, SupportsFloat]):
method metadata (line 287) | def metadata(self) -> dict:
method metadata (line 294) | def metadata(self, value):
method render_mode (line 298) | def render_mode(self) -> Optional[str]:
method np_random (line 303) | def np_random(self) -> np.random.Generator:
method np_random (line 308) | def np_random(self, value):
method _np_random (line 312) | def _np_random(self):
method step (line 317) | def step(self, action: ActType) -> Tuple[ObsType, float, bool, bool, d...
method reset (line 321) | def reset(self, **kwargs) -> Tuple[ObsType, dict]:
method render (line 325) | def render(
method close (line 331) | def close(self):
method __str__ (line 335) | def __str__(self):
method __repr__ (line 339) | def __repr__(self):
method unwrapped (line 344) | def unwrapped(self) -> Env:
class ObservationWrapper (line 349) | class ObservationWrapper(Wrapper):
method reset (line 377) | def reset(self, **kwargs):
method step (line 382) | def step(self, action):
method observation (line 387) | def observation(self, observation):
class RewardWrapper (line 392) | class RewardWrapper(Wrapper):
method step (line 416) | def step(self, action):
method reward (line 421) | def reward(self, reward):
class ActionWrapper (line 426) | class ActionWrapper(Wrapper):
method step (line 458) | def step(self, action):
method action (line 462) | def action(self, action):
method reverse_action (line 466) | def reverse_action(self, action):
FILE: gym/envs/box2d/bipedal_walker.py
class ContactDetector (line 80) | class ContactDetector(contactListener):
method __init__ (line 81) | def __init__(self, env):
method BeginContact (line 85) | def BeginContact(self, contact):
method EndContact (line 95) | def EndContact(self, contact):
class BipedalWalker (line 101) | class BipedalWalker(gym.Env, EzPickle):
method __init__ (line 170) | def __init__(self, render_mode: Optional[str] = None, hardcore: bool =...
method _destroy (line 263) | def _destroy(self):
method _generate_terrain (line 277) | def _generate_terrain(self, hardcore):
method _generate_clouds (line 404) | def _generate_clouds(self):
method reset (line 425) | def reset(
method step (line 517) | def step(self, action: np.ndarray):
method render (line 608) | def render(self):
method close (line 757) | def close(self):
class BipedalWalkerHardcore (line 766) | class BipedalWalkerHardcore:
method __init__ (line 767) | def __init__(self):
FILE: gym/envs/box2d/car_dynamics.py
class Car (line 50) | class Car:
method __init__ (line 51) | def __init__(self, world, init_angle, init_x, init_y):
method gas (line 138) | def gas(self, gas):
method brake (line 151) | def brake(self, b):
method steer (line 159) | def steer(self, s):
method step (line 167) | def step(self, dt):
method draw (line 263) | def draw(self, surface, zoom, translation, angle, draw_particles=True):
method _create_particle (line 332) | def _create_particle(self, point1, point2, grass):
method destroy (line 346) | def destroy(self):
FILE: gym/envs/box2d/car_racing.py
class FrictionDetector (line 57) | class FrictionDetector(contactListener):
method __init__ (line 58) | def __init__(self, env, lap_complete_percent):
method BeginContact (line 63) | def BeginContact(self, contact):
method EndContact (line 66) | def EndContact(self, contact):
method _contact (line 69) | def _contact(self, contact, begin):
class CarRacing (line 105) | class CarRacing(gym.Env, EzPickle):
method __init__ (line 192) | def __init__(
method _destroy (line 248) | def _destroy(self):
method _init_colors (line 257) | def _init_colors(self):
method _reinit_colors (line 273) | def _reinit_colors(self, randomize):
method _create_track (line 288) | def _create_track(self):
method reset (line 478) | def reset(
method step (line 520) | def step(self, action: Union[np.ndarray, int]):
method render (line 567) | def render(self):
method _render (line 577) | def _render(self, mode: str):
method _render_road (line 638) | def _render_road(self, zoom, translation, angle):
method _render_indicators (line 676) | def _render_indicators(self, W, H):
method _draw_colored_polygon (line 744) | def _draw_colored_polygon(
method _create_image_array (line 764) | def _create_image_array(self, screen, size):
method close (line 770) | def close(self):
function register_input (line 780) | def register_input():
FILE: gym/envs/box2d/lunar_lander.py
class ContactDetector (line 54) | class ContactDetector(contactListener):
method __init__ (line 55) | def __init__(self, env):
method BeginContact (line 59) | def BeginContact(self, contact):
method EndContact (line 69) | def EndContact(self, contact):
class LunarLander (line 75) | class LunarLander(gym.Env, EzPickle):
method __init__ (line 191) | def __init__(
method _destroy (line 296) | def _destroy(self):
method reset (line 308) | def reset(
method _create_particle (line 422) | def _create_particle(self, mass, x, y, ttl):
method _clean_particles (line 440) | def _clean_particles(self, all):
method step (line 444) | def step(self, action):
method render (line 602) | def render(self):
method close (line 717) | def close(self):
function heuristic (line 726) | def heuristic(env, s):
function demo_heuristic_lander (line 780) | def demo_heuristic_lander(env, seed=None, render=False):
class LunarLanderContinuous (line 806) | class LunarLanderContinuous:
method __init__ (line 807) | def __init__(self):
FILE: gym/envs/classic_control/acrobot.py
class AcrobotEnv (line 26) | class AcrobotEnv(core.Env):
method __init__ (line 168) | def __init__(self, render_mode: Optional[str] = None):
method reset (line 181) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
method step (line 196) | def step(self, a):
method _get_ob (line 225) | def _get_ob(self):
method _terminal (line 232) | def _terminal(self):
method _dsdt (line 237) | def _dsdt(self, s_augmented):
method render (line 279) | def render(self):
method close (line 369) | def close(self):
function wrap (line 378) | def wrap(x, m, M):
function bound (line 399) | def bound(x, m, M=None):
function rk4 (line 418) | def rk4(derivs, y0, t):
FILE: gym/envs/classic_control/cartpole.py
class CartPoleEnv (line 17) | class CartPoleEnv(gym.Env[np.ndarray, Union[int, np.ndarray]]):
method __init__ (line 89) | def __init__(self, render_mode: Optional[str] = None):
method step (line 130) | def step(self, action):
method reset (line 190) | def reset(
method render (line 209) | def render(self):
method close (line 306) | def close(self):
FILE: gym/envs/classic_control/continuous_mountain_car.py
class Continuous_MountainCarEnv (line 27) | class Continuous_MountainCarEnv(gym.Env):
method __init__ (line 108) | def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
method step (line 142) | def step(self, action: np.ndarray):
method reset (line 177) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
method _height (line 188) | def _height(self, xs):
method render (line 191) | def render(self):
method close (line 294) | def close(self):
FILE: gym/envs/classic_control/mountain_car.py
class MountainCarEnv (line 16) | class MountainCarEnv(gym.Env):
method __init__ (line 103) | def __init__(self, render_mode: Optional[str] = None, goal_velocity=0):
method step (line 127) | def step(self, action: int):
method reset (line 150) | def reset(
method _height (line 166) | def _height(self, xs):
method render (line 169) | def render(self):
method get_keys_to_action (line 272) | def get_keys_to_action(self):
method close (line 276) | def close(self):
FILE: gym/envs/classic_control/pendulum.py
class PendulumEnv (line 17) | class PendulumEnv(gym.Env):
method __init__ (line 95) | def __init__(self, render_mode: Optional[str] = None, g=10.0):
method step (line 119) | def step(self, u):
method reset (line 141) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
method _get_obs (line 161) | def _get_obs(self):
method render (line 165) | def render(self):
method close (line 261) | def close(self):
function angle_normalize (line 270) | def angle_normalize(x):
FILE: gym/envs/classic_control/utils.py
function verify_number_and_cast (line 8) | def verify_number_and_cast(x: SupportsFloat) -> float:
function maybe_parse_reset_bounds (line 17) | def maybe_parse_reset_bounds(
FILE: gym/envs/mujoco/ant.py
class AntEnv (line 8) | class AntEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 18) | def __init__(self, **kwargs):
method step (line 27) | def step(self, a):
method _get_obs (line 61) | def _get_obs(self):
method reset_model (line 70) | def reset_model(self):
method viewer_setup (line 78) | def viewer_setup(self):
FILE: gym/envs/mujoco/ant_v3.py
class AntEnv (line 12) | class AntEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 22) | def __init__(
method healthy_reward (line 78) | def healthy_reward(self):
method control_cost (line 84) | def control_cost(self, action):
method contact_forces (line 89) | def contact_forces(self):
method contact_cost (line 96) | def contact_cost(self):
method is_healthy (line 103) | def is_healthy(self):
method terminated (line 110) | def terminated(self):
method step (line 114) | def step(self, action):
method _get_obs (line 151) | def _get_obs(self):
method reset_model (line 163) | def reset_model(self):
method viewer_setup (line 180) | def viewer_setup(self):
FILE: gym/envs/mujoco/ant_v4.py
class AntEnv (line 12) | class AntEnv(MujocoEnv, utils.EzPickle):
method __init__ (line 184) | def __init__(
method healthy_reward (line 245) | def healthy_reward(self):
method control_cost (line 251) | def control_cost(self, action):
method contact_forces (line 256) | def contact_forces(self):
method contact_cost (line 263) | def contact_cost(self):
method is_healthy (line 270) | def is_healthy(self):
method terminated (line 277) | def terminated(self):
method step (line 281) | def step(self, action):
method _get_obs (line 320) | def _get_obs(self):
method reset_model (line 333) | def reset_model(self):
method viewer_setup (line 350) | def viewer_setup(self):
FILE: gym/envs/mujoco/half_cheetah.py
class HalfCheetahEnv (line 8) | class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 18) | def __init__(self, **kwargs):
method step (line 25) | def step(self, action):
method _get_obs (line 46) | def _get_obs(self):
method reset_model (line 54) | def reset_model(self):
method viewer_setup (line 62) | def viewer_setup(self):
FILE: gym/envs/mujoco/half_cheetah_v3.py
class HalfCheetahEnv (line 14) | class HalfCheetahEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 24) | def __init__(
method control_cost (line 66) | def control_cost(self, action):
method step (line 70) | def step(self, action):
method _get_obs (line 94) | def _get_obs(self):
method reset_model (line 104) | def reset_model(self):
method viewer_setup (line 121) | def viewer_setup(self):
FILE: gym/envs/mujoco/half_cheetah_v4.py
class HalfCheetahEnv (line 14) | class HalfCheetahEnv(MujocoEnv, utils.EzPickle):
method __init__ (line 144) | def __init__(
method control_cost (line 184) | def control_cost(self, action):
method step (line 188) | def step(self, action):
method _get_obs (line 212) | def _get_obs(self):
method reset_model (line 222) | def reset_model(self):
method viewer_setup (line 239) | def viewer_setup(self):
FILE: gym/envs/mujoco/hopper.py
class HopperEnv (line 8) | class HopperEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 18) | def __init__(self, **kwargs):
method step (line 25) | def step(self, a):
method _get_obs (line 47) | def _get_obs(self):
method reset_model (line 52) | def reset_model(self):
method viewer_setup (line 62) | def viewer_setup(self):
FILE: gym/envs/mujoco/hopper_v3.py
class HopperEnv (line 17) | class HopperEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 27) | def __init__(
method healthy_reward (line 87) | def healthy_reward(self):
method control_cost (line 93) | def control_cost(self, action):
method is_healthy (line 98) | def is_healthy(self):
method terminated (line 115) | def terminated(self):
method _get_obs (line 119) | def _get_obs(self):
method step (line 129) | def step(self, action):
method reset_model (line 155) | def reset_model(self):
method viewer_setup (line 171) | def viewer_setup(self):
FILE: gym/envs/mujoco/hopper_v4.py
class HopperEnv (line 15) | class HopperEnv(MujocoEnv, utils.EzPickle):
method __init__ (line 150) | def __init__(
method healthy_reward (line 208) | def healthy_reward(self):
method control_cost (line 214) | def control_cost(self, action):
method is_healthy (line 219) | def is_healthy(self):
method terminated (line 236) | def terminated(self):
method _get_obs (line 240) | def _get_obs(self):
method step (line 250) | def step(self, action):
method reset_model (line 276) | def reset_model(self):
method viewer_setup (line 292) | def viewer_setup(self):
FILE: gym/envs/mujoco/humanoid.py
function mass_center (line 8) | def mass_center(model, sim):
class HumanoidEnv (line 14) | class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 24) | def __init__(self, **kwargs):
method _get_obs (line 33) | def _get_obs(self):
method step (line 46) | def step(self, a):
method reset_model (line 76) | def reset_model(self):
method viewer_setup (line 89) | def viewer_setup(self):
FILE: gym/envs/mujoco/humanoid_v3.py
function mass_center (line 15) | def mass_center(model, sim):
class HumanoidEnv (line 21) | class HumanoidEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 31) | def __init__(
method healthy_reward (line 87) | def healthy_reward(self):
method control_cost (line 93) | def control_cost(self, action):
method contact_cost (line 98) | def contact_cost(self):
method is_healthy (line 106) | def is_healthy(self):
method terminated (line 113) | def terminated(self):
method _get_obs (line 117) | def _get_obs(self):
method step (line 141) | def step(self, action):
method reset_model (line 178) | def reset_model(self):
method viewer_setup (line 193) | def viewer_setup(self):
FILE: gym/envs/mujoco/humanoid_v4.py
function mass_center (line 15) | def mass_center(model, data):
class HumanoidEnv (line 21) | class HumanoidEnv(MujocoEnv, utils.EzPickle):
method __init__ (line 224) | def __init__(
method healthy_reward (line 273) | def healthy_reward(self):
method control_cost (line 279) | def control_cost(self, action):
method is_healthy (line 284) | def is_healthy(self):
method terminated (line 291) | def terminated(self):
method _get_obs (line 295) | def _get_obs(self):
method step (line 319) | def step(self, action):
method reset_model (line 353) | def reset_model(self):
method viewer_setup (line 368) | def viewer_setup(self):
FILE: gym/envs/mujoco/humanoidstandup.py
class HumanoidStandupEnv (line 8) | class HumanoidStandupEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 18) | def __init__(self, **kwargs):
method _get_obs (line 31) | def _get_obs(self):
method step (line 44) | def step(self, a):
method reset_model (line 69) | def reset_model(self):
method viewer_setup (line 82) | def viewer_setup(self):
FILE: gym/envs/mujoco/humanoidstandup_v4.py
class HumanoidStandupEnv (line 8) | class HumanoidStandupEnv(MujocoEnv, utils.EzPickle):
method __init__ (line 190) | def __init__(self, **kwargs):
method _get_obs (line 203) | def _get_obs(self):
method step (line 216) | def step(self, a):
method reset_model (line 241) | def reset_model(self):
method viewer_setup (line 254) | def viewer_setup(self):
FILE: gym/envs/mujoco/inverted_double_pendulum.py
class InvertedDoublePendulumEnv (line 8) | class InvertedDoublePendulumEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 18) | def __init__(self, **kwargs):
method step (line 29) | def step(self, action):
method _get_obs (line 45) | def _get_obs(self):
method reset_model (line 56) | def reset_model(self):
method viewer_setup (line 64) | def viewer_setup(self):
FILE: gym/envs/mujoco/inverted_double_pendulum_v4.py
class InvertedDoublePendulumEnv (line 8) | class InvertedDoublePendulumEnv(MujocoEnv, utils.EzPickle):
method __init__ (line 124) | def __init__(self, **kwargs):
method step (line 135) | def step(self, action):
method _get_obs (line 149) | def _get_obs(self):
method reset_model (line 160) | def reset_model(self):
method viewer_setup (line 168) | def viewer_setup(self):
FILE: gym/envs/mujoco/inverted_pendulum.py
class InvertedPendulumEnv (line 8) | class InvertedPendulumEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 18) | def __init__(self, **kwargs):
method step (line 29) | def step(self, a):
method reset_model (line 40) | def reset_model(self):
method _get_obs (line 50) | def _get_obs(self):
method viewer_setup (line 53) | def viewer_setup(self):
FILE: gym/envs/mujoco/inverted_pendulum_v4.py
class InvertedPendulumEnv (line 8) | class InvertedPendulumEnv(MujocoEnv, utils.EzPickle):
method __init__ (line 95) | def __init__(self, **kwargs):
method step (line 106) | def step(self, a):
method reset_model (line 115) | def reset_model(self):
method _get_obs (line 125) | def _get_obs(self):
method viewer_setup (line 128) | def viewer_setup(self):
FILE: gym/envs/mujoco/mujoco_env.py
class BaseMujocoEnv (line 28) | class BaseMujocoEnv(gym.Env):
method __init__ (line 31) | def __init__(
method _set_action_space (line 77) | def _set_action_space(self):
method reset_model (line 86) | def reset_model(self):
method viewer_setup (line 93) | def viewer_setup(self):
method _initialize_simulation (line 99) | def _initialize_simulation(self):
method _reset_simulation (line 105) | def _reset_simulation(self):
method _step_mujoco_simulation (line 111) | def _step_mujoco_simulation(self, ctrl, n_frames):
method render (line 117) | def render(self):
method reset (line 125) | def reset(
method set_state (line 140) | def set_state(self, qpos, qvel):
method dt (line 147) | def dt(self):
method do_simulation (line 150) | def do_simulation(self, ctrl, n_frames):
method close (line 159) | def close(self):
method get_body_com (line 164) | def get_body_com(self, body_name):
method state_vector (line 168) | def state_vector(self):
class MuJocoPyEnv (line 173) | class MuJocoPyEnv(BaseMujocoEnv):
method __init__ (line 174) | def __init__(
method _initialize_simulation (line 209) | def _initialize_simulation(self):
method _reset_simulation (line 214) | def _reset_simulation(self):
method set_state (line 217) | def set_state(self, qpos, qvel):
method _step_mujoco_simulation (line 224) | def _step_mujoco_simulation(self, ctrl, n_frames):
method render (line 230) | def render(self):
method _get_viewer (line 277) | def _get_viewer(
method get_body_com (line 297) | def get_body_com(self, body_name):
class MujocoEnv (line 301) | class MujocoEnv(BaseMujocoEnv):
method __init__ (line 304) | def __init__(
method _initialize_simulation (line 330) | def _initialize_simulation(self):
method _reset_simulation (line 337) | def _reset_simulation(self):
method set_state (line 340) | def set_state(self, qpos, qvel):
method _step_mujoco_simulation (line 348) | def _step_mujoco_simulation(self, ctrl, n_frames):
method render (line 358) | def render(self):
method close (line 406) | def close(self):
method _get_viewer (line 411) | def _get_viewer(
method get_body_com (line 436) | def get_body_com(self, body_name):
FILE: gym/envs/mujoco/mujoco_rendering.py
function _import_egl (line 12) | def _import_egl(width, height):
function _import_glfw (line 18) | def _import_glfw(width, height):
function _import_osmesa (line 24) | def _import_osmesa(width, height):
class RenderContext (line 39) | class RenderContext:
method __init__ (line 42) | def __init__(self, model, data, offscreen=True):
method _set_mujoco_buffers (line 65) | def _set_mujoco_buffers(self):
method render (line 75) | def render(self, camera_id=None, segmentation=False):
method read_pixels (line 119) | def read_pixels(self, depth=True, segmentation=False):
method _init_camera (line 152) | def _init_camera(self):
method add_overlay (line 159) | def add_overlay(self, gridpos: int, text1: str, text2: str):
method add_marker (line 166) | def add_marker(self, **marker_params):
method _add_marker_to_scene (line 169) | def _add_marker_to_scene(self, marker):
method close (line 215) | def close(self):
class RenderContextOffscreen (line 222) | class RenderContextOffscreen(RenderContext):
method __init__ (line 225) | def __init__(self, model, data):
method _get_opengl_backend (line 234) | def _get_opengl_backend(self, width, height):
class Viewer (line 262) | class Viewer(RenderContext):
method __init__ (line 265) | def __init__(self, model, data):
method _key_callback (line 305) | def _key_callback(self, window, key, scancode, action, mods):
method _cursor_pos_callback (line 372) | def _cursor_pos_callback(self, window, xpos, ypos):
method _mouse_button_callback (line 407) | def _mouse_button_callback(self, window, button, act, mods):
method _scroll_callback (line 419) | def _scroll_callback(self, window, x_offset, y_offset):
method _create_overlay (line 430) | def _create_overlay(self):
method render (line 480) | def render(self):
method close (line 550) | def close(self):
FILE: gym/envs/mujoco/pusher.py
class PusherEnv (line 8) | class PusherEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 18) | def __init__(self, **kwargs):
method step (line 25) | def step(self, a):
method viewer_setup (line 47) | def viewer_setup(self):
method reset_model (line 52) | def reset_model(self):
method _get_obs (line 75) | def _get_obs(self):
FILE: gym/envs/mujoco/pusher_v4.py
class PusherEnv (line 8) | class PusherEnv(MujocoEnv, utils.EzPickle):
method __init__ (line 140) | def __init__(self, **kwargs):
method step (line 147) | def step(self, a):
method viewer_setup (line 169) | def viewer_setup(self):
method reset_model (line 174) | def reset_model(self):
method _get_obs (line 197) | def _get_obs(self):
FILE: gym/envs/mujoco/reacher.py
class ReacherEnv (line 8) | class ReacherEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 18) | def __init__(self, **kwargs):
method step (line 25) | def step(self, a):
method viewer_setup (line 44) | def viewer_setup(self):
method reset_model (line 48) | def reset_model(self):
method _get_obs (line 65) | def _get_obs(self):
FILE: gym/envs/mujoco/reacher_v4.py
class ReacherEnv (line 8) | class ReacherEnv(MujocoEnv, utils.EzPickle):
method __init__ (line 130) | def __init__(self, **kwargs):
method step (line 137) | def step(self, a):
method viewer_setup (line 156) | def viewer_setup(self):
method reset_model (line 160) | def reset_model(self):
method _get_obs (line 177) | def _get_obs(self):
FILE: gym/envs/mujoco/swimmer.py
class SwimmerEnv (line 8) | class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 18) | def __init__(self, **kwargs):
method step (line 25) | def step(self, a):
method _get_obs (line 47) | def _get_obs(self):
method reset_model (line 52) | def reset_model(self):
FILE: gym/envs/mujoco/swimmer_v3.py
class SwimmerEnv (line 12) | class SwimmerEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 22) | def __init__(
method control_cost (line 63) | def control_cost(self, action):
method step (line 67) | def step(self, action):
method _get_obs (line 96) | def _get_obs(self):
method reset_model (line 106) | def reset_model(self):
method viewer_setup (line 122) | def viewer_setup(self):
FILE: gym/envs/mujoco/swimmer_v4.py
class SwimmerEnv (line 12) | class SwimmerEnv(MujocoEnv, utils.EzPickle):
method __init__ (line 136) | def __init__(
method control_cost (line 173) | def control_cost(self, action):
method step (line 177) | def step(self, action):
method _get_obs (line 207) | def _get_obs(self):
method reset_model (line 217) | def reset_model(self):
method viewer_setup (line 233) | def viewer_setup(self):
FILE: gym/envs/mujoco/walker2d.py
class Walker2dEnv (line 8) | class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 18) | def __init__(self, **kwargs):
method step (line 25) | def step(self, a):
method _get_obs (line 42) | def _get_obs(self):
method reset_model (line 47) | def reset_model(self):
method viewer_setup (line 56) | def viewer_setup(self):
FILE: gym/envs/mujoco/walker2d_v3.py
class Walker2dEnv (line 15) | class Walker2dEnv(MuJocoPyEnv, utils.EzPickle):
method __init__ (line 25) | def __init__(
method healthy_reward (line 81) | def healthy_reward(self):
method control_cost (line 87) | def control_cost(self, action):
method is_healthy (line 92) | def is_healthy(self):
method terminated (line 105) | def terminated(self):
method _get_obs (line 109) | def _get_obs(self):
method step (line 119) | def step(self, action):
method reset_model (line 145) | def reset_model(self):
method viewer_setup (line 161) | def viewer_setup(self):
FILE: gym/envs/mujoco/walker2d_v4.py
class Walker2dEnv (line 15) | class Walker2dEnv(MujocoEnv, utils.EzPickle):
method __init__ (line 155) | def __init__(
method healthy_reward (line 209) | def healthy_reward(self):
method control_cost (line 215) | def control_cost(self, action):
method is_healthy (line 220) | def is_healthy(self):
method terminated (line 233) | def terminated(self):
method _get_obs (line 237) | def _get_obs(self):
method step (line 247) | def step(self, action):
method reset_model (line 274) | def reset_model(self):
method viewer_setup (line 290) | def viewer_setup(self):
FILE: gym/envs/registration.py
function load (line 51) | def load(name: str) -> callable:
function parse_env_id (line 66) | def parse_env_id(id: str) -> Tuple[Optional[str], str, Optional[int]]:
function get_env_id (line 97) | def get_env_id(ns: Optional[str], name: str, version: Optional[int]) -> ...
class EnvSpec (line 118) | class EnvSpec:
method __post_init__ (line 154) | def __post_init__(self):
method make (line 158) | def make(self, **kwargs) -> Env:
function _check_namespace_exists (line 163) | def _check_namespace_exists(ns: Optional[str]):
function _check_name_exists (line 185) | def _check_name_exists(ns: Optional[str], name: str):
function _check_version_exists (line 202) | def _check_version_exists(ns: Optional[str], name: str, version: Optiona...
function find_highest_version (line 257) | def find_highest_version(ns: Optional[str], name: str) -> Optional[int]:
function load_env_plugins (line 266) | def load_env_plugins(entry_point: str = "gym.envs") -> None:
function make (line 314) | def make(id: str, **kwargs) -> Env: ...
function make (line 316) | def make(id: EnvSpec, **kwargs) -> Env: ...
function make (line 322) | def make(id: Literal["CartPole-v0", "CartPole-v1"], **kwargs) -> Env[np....
function make (line 324) | def make(id: Literal["MountainCar-v0"], **kwargs) -> Env[np.ndarray, Uni...
function make (line 326) | def make(id: Literal["MountainCarContinuous-v0"], **kwargs) -> Env[np.nd...
function make (line 328) | def make(id: Literal["Pendulum-v1"], **kwargs) -> Env[np.ndarray, Union[...
function make (line 330) | def make(id: Literal["Acrobot-v1"], **kwargs) -> Env[np.ndarray, Union[n...
function make (line 336) | def make(id: Literal["LunarLander-v2", "LunarLanderContinuous-v2"], **kw...
function make (line 338) | def make(id: Literal["BipedalWalker-v3", "BipedalWalkerHardcore-v3"], **...
function make (line 340) | def make(id: Literal["CarRacing-v2"], **kwargs) -> Env[np.ndarray, Union...
function make (line 346) | def make(id: Literal["Blackjack-v1"], **kwargs) -> Env[np.ndarray, Union...
function make (line 348) | def make(id: Literal["FrozenLake-v1", "FrozenLake8x8-v1"], **kwargs) -> ...
function make (line 350) | def make(id: Literal["CliffWalking-v0"], **kwargs) -> Env[np.ndarray, Un...
function make (line 352) | def make(id: Literal["Taxi-v3"], **kwargs) -> Env[np.ndarray, Union[np.n...
function make (line 358) | def make(id: Literal[
function _check_spec_register (line 379) | def _check_spec_register(spec: EnvSpec):
function namespace (line 426) | def namespace(ns: str):
function register (line 434) | def register(
function make (line 502) | def make(
function spec (line 694) | def spec(env_id: str) -> EnvSpec:
FILE: gym/envs/toy_text/blackjack.py
function cmp (line 11) | def cmp(a, b):
function draw_card (line 19) | def draw_card(np_random):
function draw_hand (line 23) | def draw_hand(np_random):
function usable_ace (line 27) | def usable_ace(hand): # Does this hand have a usable ace?
function sum_hand (line 31) | def sum_hand(hand): # Return current hand total
function is_bust (line 37) | def is_bust(hand): # Is this hand a bust?
function score (line 41) | def score(hand): # What is the score of this hand (0 if bust)
function is_natural (line 45) | def is_natural(hand): # Is this hand a natural blackjack?
class BlackjackEnv (line 49) | class BlackjackEnv(gym.Env):
method __init__ (line 118) | def __init__(self, render_mode: Optional[str] = None, natural=False, s...
method step (line 133) | def step(self, action):
method _get_obs (line 164) | def _get_obs(self):
method reset (line 167) | def reset(
method render (line 192) | def render(self):
method close (line 310) | def close(self):
FILE: gym/envs/toy_text/cliffwalking.py
class CliffWalkingEnv (line 18) | class CliffWalkingEnv(Env):
method __init__ (line 70) | def __init__(self, render_mode: Optional[str] = None):
method _limit_coordinates (line 117) | def _limit_coordinates(self, coord: np.ndarray) -> np.ndarray:
method _calculate_transition_prob (line 125) | def _calculate_transition_prob(self, current, delta):
method step (line 145) | def step(self, a):
method reset (line 156) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
method render (line 165) | def render(self):
method _render_gui (line 177) | def _render_gui(self, mode):
method _render_text (line 268) | def _render_text(self):
FILE: gym/envs/toy_text/frozen_lake.py
function is_valid (line 33) | def is_valid(board: List[List[str]], max_size: int) -> bool:
function generate_random_map (line 53) | def generate_random_map(size: int = 8, p: float = 0.8) -> List[str]:
class FrozenLakeEnv (line 75) | class FrozenLakeEnv(Env):
method __init__ (line 162) | def __init__(
method step (line 244) | def step(self, a):
method reset (line 255) | def reset(
method render (line 269) | def render(self):
method _render_gui (line 281) | def _render_gui(self, mode):
method _center_small_rect (line 380) | def _center_small_rect(big_rect, small_dims):
method _render_text (line 388) | def _render_text(self):
method close (line 404) | def close(self):
FILE: gym/envs/toy_text/taxi.py
class TaxiEnv (line 24) | class TaxiEnv(Env):
method __init__ (line 128) | def __init__(self, render_mode: Optional[str] = None):
method encode (line 210) | def encode(self, taxi_row, taxi_col, pass_loc, dest_idx):
method decode (line 221) | def decode(self, i):
method action_mask (line 233) | def action_mask(self, state: int):
method step (line 254) | def step(self, a):
method reset (line 265) | def reset(
method render (line 280) | def render(self):
method _render_gui (line 292) | def _render_gui(self, mode):
method get_surf_loc (line 422) | def get_surf_loc(self, map_loc):
method _render_text (line 427) | def _render_text(self):
method close (line 463) | def close(self):
FILE: gym/envs/toy_text/utils.py
function categorical_sample (line 4) | def categorical_sample(prob_n, np_random: np.random.Generator):
FILE: gym/error.py
class Error (line 5) | class Error(Exception):
class Unregistered (line 12) | class Unregistered(Error):
class UnregisteredEnv (line 16) | class UnregisteredEnv(Unregistered):
class NamespaceNotFound (line 20) | class NamespaceNotFound(UnregisteredEnv):
class NameNotFound (line 24) | class NameNotFound(UnregisteredEnv):
class VersionNotFound (line 28) | class VersionNotFound(UnregisteredEnv):
class UnregisteredBenchmark (line 32) | class UnregisteredBenchmark(Unregistered):
class DeprecatedEnv (line 36) | class DeprecatedEnv(Error):
class RegistrationError (line 40) | class RegistrationError(Error):
class UnseedableEnv (line 44) | class UnseedableEnv(Error):
class DependencyNotInstalled (line 48) | class DependencyNotInstalled(Error):
class UnsupportedMode (line 52) | class UnsupportedMode(Error):
class ResetNeeded (line 56) | class ResetNeeded(Error):
class ResetNotAllowed (line 60) | class ResetNotAllowed(Error):
class InvalidAction (line 64) | class InvalidAction(Error):
class APIError (line 71) | class APIError(Error):
method __init__ (line 74) | def __init__(
method __unicode__ (line 100) | def __unicode__(self):
method __str__ (line 108) | def __str__(self):
class APIConnectionError (line 113) | class APIConnectionError(APIError):
class InvalidRequestError (line 117) | class InvalidRequestError(APIError):
method __init__ (line 120) | def __init__(
class AuthenticationError (line 134) | class AuthenticationError(APIError):
class RateLimitError (line 138) | class RateLimitError(APIError):
class VideoRecorderError (line 145) | class VideoRecorderError(Error):
class InvalidFrame (line 149) | class InvalidFrame(Error):
class DoubleWrapperError (line 156) | class DoubleWrapperError(Error):
class WrapAfterConfigureError (line 160) | class WrapAfterConfigureError(Error):
class RetriesExceededError (line 164) | class RetriesExceededError(Error):
class AlreadyPendingCallError (line 171) | class AlreadyPendingCallError(Exception):
method __init__ (line 174) | def __init__(self, message: str, name: str):
class NoAsyncCallError (line 180) | class NoAsyncCallError(Exception):
method __init__ (line 183) | def __init__(self, message: str, name: str):
class ClosedEnvironmentError (line 189) | class ClosedEnvironmentError(Exception):
class CustomSpaceError (line 193) | class CustomSpaceError(Exception):
FILE: gym/logger.py
function set_level (line 21) | def set_level(level: int):
function debug (line 27) | def debug(msg: str, *args: object):
function info (line 33) | def info(msg: str, *args: object):
function warn (line 39) | def warn(
function deprecation (line 61) | def deprecation(msg: str, *args: object):
function error (line 66) | def error(msg: str, *args: object):
FILE: gym/spaces/box.py
function _short_repr (line 11) | def _short_repr(arr: np.ndarray) -> str:
function is_float_integer (line 28) | def is_float_integer(var) -> bool:
class Box (line 33) | class Box(Space[np.ndarray]):
method __init__ (line 53) | def __init__(
method shape (line 137) | def shape(self) -> Tuple[int, ...]:
method is_np_flattenable (line 142) | def is_np_flattenable(self):
method is_bounded (line 146) | def is_bounded(self, manner: str = "both") -> bool:
method sample (line 171) | def sample(self, mask: None = None) -> np.ndarray:
method contains (line 224) | def contains(self, x) -> bool:
method to_jsonable (line 240) | def to_jsonable(self, sample_n):
method from_jsonable (line 244) | def from_jsonable(self, sample_n: Sequence[Union[float, int]]) -> List...
method __repr__ (line 248) | def __repr__(self) -> str:
method __eq__ (line 259) | def __eq__(self, other) -> bool:
method __setstate__ (line 269) | def __setstate__(self, state: Dict):
function get_inf (line 281) | def get_inf(dtype, sign: str) -> SupportsFloat:
function get_precision (line 313) | def get_precision(dtype) -> SupportsFloat:
function _broadcast (line 321) | def _broadcast(
FILE: gym/spaces/dict.py
class Dict (line 15) | class Dict(Space[TypingDict[str, Space]], Mapping):
method __init__ (line 54) | def __init__(
method is_np_flattenable (line 121) | def is_np_flattenable(self):
method seed (line 125) | def seed(self, seed: Optional[Union[dict, int]] = None) -> list:
method sample (line 162) | def sample(self, mask: Optional[TypingDict[str, Any]] = None) -> dict:
method contains (line 186) | def contains(self, x) -> bool:
method __getitem__ (line 192) | def __getitem__(self, key: str) -> Space:
method __setitem__ (line 196) | def __setitem__(self, key: str, value: Space):
method __iter__ (line 203) | def __iter__(self):
method __len__ (line 207) | def __len__(self) -> int:
method __repr__ (line 211) | def __repr__(self) -> str:
method __eq__ (line 217) | def __eq__(self, other) -> bool:
method to_jsonable (line 225) | def to_jsonable(self, sample_n: list) -> dict:
method from_jsonable (line 233) | def from_jsonable(self, sample_n: TypingDict[str, list]) -> List[dict]:
FILE: gym/spaces/discrete.py
class Discrete (line 9) | class Discrete(Space[int]):
method __init__ (line 20) | def __init__(
method is_np_flattenable (line 43) | def is_np_flattenable(self):
method sample (line 47) | def sample(self, mask: Optional[np.ndarray] = None) -> int:
method contains (line 83) | def contains(self, x) -> bool:
method __repr__ (line 96) | def __repr__(self) -> str:
method __eq__ (line 102) | def __eq__(self, other) -> bool:
method __setstate__ (line 110) | def __setstate__(self, state):
FILE: gym/spaces/graph.py
class GraphInstance (line 13) | class GraphInstance(NamedTuple):
class Graph (line 26) | class Graph(Space):
method __init__ (line 34) | def __init__(
method is_np_flattenable (line 67) | def is_np_flattenable(self):
method _generate_sample_space (line 71) | def _generate_sample_space(
method sample (line 92) | def sample(
method contains (line 163) | def contains(self, x: GraphInstance) -> bool:
method __repr__ (line 190) | def __repr__(self) -> str:
method __eq__ (line 200) | def __eq__(self, other) -> bool:
method to_jsonable (line 208) | def to_jsonable(self, sample_n: NamedTuple) -> list:
method from_jsonable (line 221) | def from_jsonable(self, sample_n: Sequence[dict]) -> list:
FILE: gym/spaces/multi_binary.py
class MultiBinary (line 9) | class MultiBinary(Space[np.ndarray]):
method __init__ (line 26) | def __init__(
method shape (line 49) | def shape(self) -> Tuple[int, ...]:
method is_np_flattenable (line 54) | def is_np_flattenable(self):
method sample (line 58) | def sample(self, mask: Optional[np.ndarray] = None) -> np.ndarray:
method contains (line 93) | def contains(self, x) -> bool:
method to_jsonable (line 104) | def to_jsonable(self, sample_n) -> list:
method from_jsonable (line 108) | def from_jsonable(self, sample_n) -> list:
method __repr__ (line 112) | def __repr__(self) -> str:
method __eq__ (line 116) | def __eq__(self, other) -> bool:
FILE: gym/spaces/multi_discrete.py
class MultiDiscrete (line 11) | class MultiDiscrete(Space[np.ndarray]):
method __init__ (line 38) | def __init__(
method shape (line 60) | def shape(self) -> Tuple[int, ...]:
method is_np_flattenable (line 65) | def is_np_flattenable(self):
method sample (line 69) | def sample(self, mask: Optional[tuple] = None) -> np.ndarray:
method contains (line 125) | def contains(self, x) -> bool:
method to_jsonable (line 140) | def to_jsonable(self, sample_n: Iterable[np.ndarray]):
method from_jsonable (line 144) | def from_jsonable(self, sample_n):
method __repr__ (line 148) | def __repr__(self):
method __getitem__ (line 152) | def __getitem__(self, index):
method __len__ (line 165) | def __len__(self):
method __eq__ (line 173) | def __eq__(self, other):
FILE: gym/spaces/sequence.py
class Sequence (line 11) | class Sequence(Space[Tuple]):
method __init__ (line 25) | def __init__(
method seed (line 44) | def seed(self, seed: Optional[int] = None) -> list:
method is_np_flattenable (line 51) | def is_np_flattenable(self):
method sample (line 55) | def sample(
method contains (line 105) | def contains(self, x) -> bool:
method __repr__ (line 111) | def __repr__(self) -> str:
method to_jsonable (line 115) | def to_jsonable(self, sample_n: list) -> list:
method from_jsonable (line 120) | def from_jsonable(self, sample_n: List[List[Any]]) -> list:
method __eq__ (line 124) | def __eq__(self, other) -> bool:
FILE: gym/spaces/space.py
class Space (line 24) | class Space(Generic[T_cov]):
method __init__ (line 50) | def __init__(
method np_random (line 73) | def np_random(self) -> np.random.Generator:
method shape (line 81) | def shape(self) -> Optional[Tuple[int, ...]]:
method is_np_flattenable (line 86) | def is_np_flattenable(self):
method sample (line 90) | def sample(self, mask: Optional[Any] = None) -> T_cov:
method seed (line 103) | def seed(self, seed: Optional[int] = None) -> list:
method contains (line 108) | def contains(self, x) -> bool:
method __contains__ (line 112) | def __contains__(self, x) -> bool:
method __setstate__ (line 116) | def __setstate__(self, state: Union[Iterable, Mapping]):
method to_jsonable (line 142) | def to_jsonable(self, sample_n: Sequence[T_cov]) -> list:
method from_jsonable (line 147) | def from_jsonable(self, sample_n: list) -> List[T_cov]:
FILE: gym/spaces/text.py
class Text (line 13) | class Text(Space[str]):
method __init__ (line 26) | def __init__(
method sample (line 70) | def sample(
method contains (line 141) | def contains(self, x: Any) -> bool:
method __repr__ (line 148) | def __repr__(self) -> str:
method __eq__ (line 154) | def __eq__(self, other) -> bool:
method character_set (line 164) | def character_set(self) -> FrozenSet[str]:
method character_list (line 169) | def character_list(self) -> Tuple[str, ...]:
method character_index (line 173) | def character_index(self, char: str) -> np.int32:
method characters (line 178) | def characters(self) -> str:
method is_np_flattenable (line 183) | def is_np_flattenable(self) -> bool:
FILE: gym/spaces/tuple.py
class Tuple (line 13) | class Tuple(Space[tuple], CollectionSequence):
method __init__ (line 26) | def __init__(
method is_np_flattenable (line 47) | def is_np_flattenable(self):
method seed (line 51) | def seed(
method sample (line 89) | def sample(
method contains (line 118) | def contains(self, x) -> bool:
method __repr__ (line 128) | def __repr__(self) -> str:
method to_jsonable (line 132) | def to_jsonable(self, sample_n: CollectionSequence) -> list:
method from_jsonable (line 140) | def from_jsonable(self, sample_n) -> list:
method __getitem__ (line 152) | def __getitem__(self, index: int) -> Space:
method __len__ (line 156) | def __len__(self) -> int:
method __eq__ (line 160) | def __eq__(self, other) -> bool:
FILE: gym/spaces/utils.py
function flatdim (line 30) | def flatdim(space: Space) -> int:
function _flatdim_box_multibinary (line 60) | def _flatdim_box_multibinary(space: Union[Box, MultiBinary]) -> int:
function _flatdim_discrete (line 65) | def _flatdim_discrete(space: Discrete) -> int:
function _flatdim_multidiscrete (line 70) | def _flatdim_multidiscrete(space: MultiDiscrete) -> int:
function _flatdim_tuple (line 75) | def _flatdim_tuple(space: Tuple) -> int:
function _flatdim_dict (line 84) | def _flatdim_dict(space: Dict) -> int:
function _flatdim_graph (line 93) | def _flatdim_graph(space: Graph):
function _flatdim_text (line 100) | def _flatdim_text(space: Text) -> int:
function flatten (line 109) | def flatten(space: Space[T], x: T) -> FlatType:
function _flatten_box_multibinary (line 140) | def _flatten_box_multibinary(space, x) -> np.ndarray:
function _flatten_discrete (line 145) | def _flatten_discrete(space, x) -> np.ndarray:
function _flatten_multidiscrete (line 152) | def _flatten_multidiscrete(space, x) -> np.ndarray:
function _flatten_tuple (line 162) | def _flatten_tuple(space, x) -> Union[tuple, np.ndarray]:
function _flatten_dict (line 171) | def _flatten_dict(space, x) -> Union[dict, np.ndarray]:
function _flatten_graph (line 178) | def _flatten_graph(space, x) -> GraphInstance:
function _flatten_text (line 203) | def _flatten_text(space: Text, x: str) -> np.ndarray:
function _flatten_sequence (line 213) | def _flatten_sequence(space, x) -> tuple:
function unflatten (line 218) | def unflatten(space: Space[T], x: FlatType) -> T:
function _unflatten_box_multibinary (line 239) | def _unflatten_box_multibinary(
function _unflatten_discrete (line 246) | def _unflatten_discrete(space: Discrete, x: np.ndarray) -> int:
function _unflatten_multidiscrete (line 251) | def _unflatten_multidiscrete(space: MultiDiscrete, x: np.ndarray) -> np....
function _unflatten_tuple (line 260) | def _unflatten_tuple(space: Tuple, x: Union[np.ndarray, tuple]) -> tuple:
function _unflatten_dict (line 278) | def _unflatten_dict(space: Dict, x: Union[np.ndarray, TypingDict]) -> dict:
function _unflatten_graph (line 295) | def _unflatten_graph(space: Graph, x: GraphInstance) -> GraphInstance:
function _unflatten_text (line 318) | def _unflatten_text(space: Text, x: np.ndarray) -> str:
function _unflatten_sequence (line 325) | def _unflatten_sequence(space: Sequence, x: tuple) -> tuple:
function flatten_space (line 330) | def flatten_space(space: Space) -> Union[Dict, Sequence, Tuple, Graph]:
function _flatten_space_box (line 392) | def _flatten_space_box(space: Box) -> Box:
function _flatten_space_binary (line 399) | def _flatten_space_binary(space: Union[Discrete, MultiBinary, MultiDiscr...
function _flatten_space_tuple (line 404) | def _flatten_space_tuple(space: Tuple) -> Union[Box, Tuple]:
function _flatten_space_dict (line 416) | def _flatten_space_dict(space: Dict) -> Union[Box, Dict]:
function _flatten_space_graph (line 432) | def _flatten_space_graph(space: Graph) -> Graph:
function _flatten_space_text (line 442) | def _flatten_space_text(space: Text) -> Box:
function _flatten_space_sequence (line 449) | def _flatten_space_sequence(space: Sequence) -> Sequence:
FILE: gym/utils/colorize.py
function colorize (line 19) | def colorize(
FILE: gym/utils/env_checker.py
function data_equivalence (line 33) | def data_equivalence(data_1, data_2) -> bool:
function check_reset_seed (line 62) | def check_reset_seed(env: gym.Env):
function check_reset_options (line 131) | def check_reset_options(env: gym.Env):
function check_reset_return_info_deprecation (line 159) | def check_reset_return_info_deprecation(env: gym.Env):
function check_seed_deprecation (line 176) | def check_seed_deprecation(env: gym.Env):
function check_reset_return_type (line 192) | def check_reset_return_type(env: gym.Env):
function check_space_limit (line 217) | def check_space_limit(space, space_type: str):
function check_env (line 255) | def check_env(env: gym.Env, warn: bool = None, skip_render_check: bool =...
FILE: gym/utils/ezpickle.py
class EzPickle (line 4) | class EzPickle:
method __init__ (line 20) | def __init__(self, *args, **kwargs):
method __getstate__ (line 25) | def __getstate__(self):
method __setstate__ (line 32) | def __setstate__(self, d):
FILE: gym/utils/passive_env_checker.py
function _check_box_observation_space (line 11) | def _check_box_observation_space(observation_space: spaces.Box):
function _check_box_action_space (line 50) | def _check_box_action_space(action_space: spaces.Box):
function check_space (line 69) | def check_space(
function check_obs (line 122) | def check_obs(obs, observation_space: spaces.Space, method_name: str):
function env_reset_passive_checker (line 170) | def env_reset_passive_checker(env, **kwargs):
function env_step_passive_checker (line 211) | def env_step_passive_checker(env, action):
function env_render_passive_checker (line 268) | def env_render_passive_checker(env, *args, **kwargs):
FILE: gym/utils/play.py
class MissingKeysToAction (line 33) | class MissingKeysToAction(Exception):
class PlayableGame (line 37) | class PlayableGame:
method __init__ (line 40) | def __init__(
method _get_relevant_keys (line 66) | def _get_relevant_keys(
method _get_video_size (line 83) | def _get_video_size(self, zoom: Optional[float] = None) -> Tuple[int, ...
method process_event (line 95) | def process_event(self, event: Event):
function display_arr (line 119) | def display_arr(
function play (line 137) | def play(
class PlayPlot (line 278) | class PlayPlot:
method __init__ (line 306) | def __init__(
method callback (line 344) | def callback(
FILE: gym/utils/save_video.py
function capped_cubic_video_schedule (line 16) | def capped_cubic_video_schedule(episode_id: int) -> bool:
function save_video (line 33) | def save_video(
FILE: gym/utils/seeding.py
function np_random (line 9) | def np_random(seed: Optional[int] = None) -> Tuple[np.random.Generator, ...
FILE: gym/utils/step_api_compatibility.py
function convert_to_terminated_truncated_step_api (line 24) | def convert_to_terminated_truncated_step_api(
function convert_to_done_step_api (line 76) | def convert_to_done_step_api(
function step_api_compatibility (line 131) | def step_api_compatibility(
FILE: gym/vector/__init__.py
function make (line 12) | def make(
FILE: gym/vector/async_vector_env.py
class AsyncState (line 35) | class AsyncState(Enum):
class AsyncVectorEnv (line 42) | class AsyncVectorEnv(VectorEnv):
method __init__ (line 59) | def __init__(
method reset_async (line 170) | def reset_async(
method reset_wait (line 213) | def reset_wait(
method step_async (line 263) | def step_async(self, actions: np.ndarray):
method step_wait (line 288) | def step_wait(
method call_async (line 348) | def call_async(self, name: str, *args, **kwargs):
method call_wait (line 372) | def call_wait(self, timeout: Optional[Union[int, float]] = None) -> list:
method set_attr (line 405) | def set_attr(self, name: str, values: Union[list, tuple, object]):
method close_extras (line 440) | def close_extras(
method _poll (line 483) | def _poll(self, timeout=None):
method _check_spaces (line 497) | def _check_spaces(self):
method _assert_is_running (line 518) | def _assert_is_running(self):
method _raise_if_errors (line 524) | def _raise_if_errors(self, successes):
method __del__ (line 543) | def __del__(self):
function _worker (line 549) | def _worker(index, env_fn, pipe, parent_pipe, shared_memory, error_queue):
function _worker_shared_memory (line 616) | def _worker_shared_memory(index, env_fn, pipe, parent_pipe, shared_memor...
FILE: gym/vector/sync_vector_env.py
class SyncVectorEnv (line 15) | class SyncVectorEnv(VectorEnv):
method __init__ (line 30) | def __init__(
method seed (line 74) | def seed(self, seed: Optional[Union[int, Sequence[int]]] = None):
method reset_wait (line 90) | def reset_wait(
method step_async (line 131) | def step_async(self, actions):
method step_wait (line 135) | def step_wait(self):
method call (line 171) | def call(self, name, *args, **kwargs) -> tuple:
method set_attr (line 192) | def set_attr(self, name: str, values: Union[list, tuple, Any]):
method close_extras (line 216) | def close_extras(self, **kwargs):
method _check_spaces (line 220) | def _check_spaces(self) -> bool:
FILE: gym/vector/utils/misc.py
class CloudpickleWrapper (line 8) | class CloudpickleWrapper:
method __init__ (line 11) | def __init__(self, fn: callable):
method __getstate__ (line 15) | def __getstate__(self):
method __setstate__ (line 21) | def __setstate__(self, ob):
method __call__ (line 27) | def __call__(self):
function clear_mpi_env_vars (line 33) | def clear_mpi_env_vars():
FILE: gym/vector/utils/numpy_utils.py
function concatenate (line 14) | def concatenate(
function _concatenate_base (line 49) | def _concatenate_base(space, items, out):
function _concatenate_tuple (line 54) | def _concatenate_tuple(space, items, out):
function _concatenate_dict (line 62) | def _concatenate_dict(space, items, out):
function _concatenate_custom (line 72) | def _concatenate_custom(space, items, out):
function create_empty_array (line 77) | def create_empty_array(
function _create_empty_array_base (line 114) | def _create_empty_array_base(space, n=1, fn=np.zeros):
function _create_empty_array_tuple (line 120) | def _create_empty_array_tuple(space, n=1, fn=np.zeros):
function _create_empty_array_dict (line 125) | def _create_empty_array_dict(space, n=1, fn=np.zeros):
function _create_empty_array_custom (line 135) | def _create_empty_array_custom(space, n=1, fn=np.zeros):
FILE: gym/vector/utils/shared_memory.py
function create_shared_memory (line 17) | def create_shared_memory(
function _create_base_shared_memory (line 48) | def _create_base_shared_memory(space, n: int = 1, ctx=mp):
function _create_tuple_shared_memory (line 56) | def _create_tuple_shared_memory(space, n: int = 1, ctx=mp):
function _create_dict_shared_memory (line 63) | def _create_dict_shared_memory(space, n=1, ctx=mp):
function read_from_shared_memory (line 73) | def read_from_shared_memory(
function _read_base_from_shared_memory (line 108) | def _read_base_from_shared_memory(space, shared_memory, n: int = 1):
function _read_tuple_from_shared_memory (line 115) | def _read_tuple_from_shared_memory(space, shared_memory, n: int = 1):
function _read_dict_from_shared_memory (line 123) | def _read_dict_from_shared_memory(space, shared_memory, n: int = 1):
function write_to_shared_memory (line 133) | def write_to_shared_memory(
function _write_base_to_shared_memory (line 164) | def _write_base_to_shared_memory(space, index, value, shared_memory):
function _write_tuple_to_shared_memory (line 174) | def _write_tuple_to_shared_memory(space, index, values, shared_memory):
function _write_dict_to_shared_memory (line 180) | def _write_dict_to_shared_memory(space, index, values, shared_memory):
FILE: gym/vector/utils/spaces.py
function batch_space (line 18) | def batch_space(space: Space, n: int = 1) -> Space:
function _batch_space_box (line 47) | def _batch_space_box(space, n=1):
function _batch_space_discrete (line 54) | def _batch_space_discrete(space, n=1):
function _batch_space_multidiscrete (line 72) | def _batch_space_multidiscrete(space, n=1):
function _batch_space_multibinary (line 84) | def _batch_space_multibinary(space, n=1):
function _batch_space_tuple (line 95) | def _batch_space_tuple(space, n=1):
function _batch_space_dict (line 103) | def _batch_space_dict(space, n=1):
function _batch_space_custom (line 116) | def _batch_space_custom(space, n=1):
function iterate (line 128) | def iterate(space: Space, items) -> Iterator:
function _iterate_discrete (line 164) | def _iterate_discrete(space, items):
function _iterate_base (line 171) | def _iterate_base(space, items):
function _iterate_tuple (line 179) | def _iterate_tuple(space, items):
function _iterate_dict (line 194) | def _iterate_dict(space, items):
function _iterate_custom (line 206) | def _iterate_custom(space, items):
FILE: gym/vector/vector_env.py
class VectorEnv (line 12) | class VectorEnv(gym.Env):
method __init__ (line 25) | def __init__(
method reset_async (line 51) | def reset_async(
method reset_wait (line 67) | def reset_wait(
method reset (line 88) | def reset(
method step_async (line 106) | def step_async(self, actions):
method step_wait (line 115) | def step_wait(self, **kwargs):
method step (line 127) | def step(self, actions):
method call_async (line 139) | def call_async(self, name, *args, **kwargs):
method call_wait (line 142) | def call_wait(self, **kwargs) -> List[Any]: # type: ignore
method call (line 145) | def call(self, name: str, *args, **kwargs) -> List[Any]:
method get_attr (line 159) | def get_attr(self, name: str):
method set_attr (line 170) | def set_attr(self, name: str, values: Union[list, tuple, object]):
method close_extras (line 180) | def close_extras(self, **kwargs):
method close (line 184) | def close(self, **kwargs):
method _add_info (line 208) | def _add_info(self, infos: dict, info: dict, env_num: int) -> dict:
method _init_info_arrays (line 235) | def _init_info_arrays(self, dtype: type) -> Tuple[np.ndarray, np.ndarr...
method __del__ (line 260) | def __del__(self):
method __repr__ (line 265) | def __repr__(self) -> str:
class VectorEnvWrapper (line 277) | class VectorEnvWrapper(VectorEnv):
method __init__ (line 288) | def __init__(self, env: VectorEnv):
method reset_async (line 294) | def reset_async(self, **kwargs):
method reset_wait (line 297) | def reset_wait(self, **kwargs):
method step_async (line 300) | def step_async(self, actions):
method step_wait (line 303) | def step_wait(self):
method close (line 306) | def close(self, **kwargs):
method close_extras (line 309) | def close_extras(self, **kwargs):
method call (line 312) | def call(self, name, *args, **kwargs):
method set_attr (line 315) | def set_attr(self, name, values):
method __getattr__ (line 319) | def __getattr__(self, name):
method unwrapped (line 325) | def unwrapped(self):
method __repr__ (line 328) | def __repr__(self):
method __del__ (line 331) | def __del__(self):
FILE: gym/wrappers/atari_preprocessing.py
class AtariPreprocessing (line 13) | class AtariPreprocessing(gym.Wrapper):
method __init__ (line 30) | def __init__(
method ale (line 115) | def ale(self):
method step (line 119) | def step(self, action):
method reset (line 148) | def reset(self, **kwargs):
method _get_obs (line 173) | def _get_obs(self):
FILE: gym/wrappers/autoreset.py
class AutoResetWrapper (line 5) | class AutoResetWrapper(gym.Wrapper):
method __init__ (line 27) | def __init__(self, env: gym.Env):
method step (line 35) | def step(self, action):
FILE: gym/wrappers/clip_action.py
class ClipAction (line 9) | class ClipAction(ActionWrapper):
method __init__ (line 22) | def __init__(self, env: gym.Env):
method action (line 31) | def action(self, action):
FILE: gym/wrappers/compatibility.py
class LegacyEnv (line 19) | class LegacyEnv(Protocol):
method reset (line 25) | def reset(self) -> Any:
method step (line 29) | def step(self, action: Any) -> Tuple[Any, float, bool, Dict]:
method render (line 33) | def render(self, mode: Optional[str] = "human") -> Any:
method close (line 37) | def close(self):
method seed (line 41) | def seed(self, seed: Optional[int] = None):
class EnvCompatibility (line 46) | class EnvCompatibility(gym.Env):
method __init__ (line 57) | def __init__(self, old_env: LegacyEnv, render_mode: Optional[str] = No...
method reset (line 75) | def reset(
method step (line 96) | def step(self, action: Any) -> Tuple[Any, float, bool, bool, Dict]:
method render (line 112) | def render(self) -> Any:
method close (line 120) | def close(self):
method __str__ (line 124) | def __str__(self):
method __repr__ (line 128) | def __repr__(self):
FILE: gym/wrappers/env_checker.py
class PassiveEnvChecker (line 13) | class PassiveEnvChecker(gym.Wrapper):
method __init__ (line 16) | def __init__(self, env):
method step (line 33) | def step(self, action: ActType):
method reset (line 41) | def reset(self, **kwargs):
method render (line 49) | def render(self, *args, **kwargs):
FILE: gym/wrappers/filter_observation.py
class FilterObservation (line 9) | class FilterObservation(gym.ObservationWrapper):
method __init__ (line 27) | def __init__(self, env: gym.Env, filter_keys: Sequence[str] = None):
method observation (line 71) | def observation(self, observation):
method _filter_observation (line 83) | def _filter_observation(self, observation):
FILE: gym/wrappers/flatten_observation.py
class FlattenObservation (line 6) | class FlattenObservation(gym.ObservationWrapper):
method __init__ (line 22) | def __init__(self, env: gym.Env):
method observation (line 31) | def observation(self, observation):
FILE: gym/wrappers/frame_stack.py
class LazyFrames (line 12) | class LazyFrames:
method __init__ (line 23) | def __init__(self, frames: list, lz4_compress: bool = False):
method __array__ (line 48) | def __array__(self, dtype=None):
method __len__ (line 62) | def __len__(self):
method __getitem__ (line 70) | def __getitem__(self, int_or_slice: Union[int, slice]):
method __eq__ (line 86) | def __eq__(self, other):
method _check_decompress (line 90) | def _check_decompress(self, frame):
class FrameStack (line 100) | class FrameStack(gym.ObservationWrapper):
method __init__ (line 125) | def __init__(
method observation (line 152) | def observation(self, observation):
method step (line 164) | def step(self, action):
method reset (line 177) | def reset(self, **kwargs):
FILE: gym/wrappers/gray_scale_observation.py
class GrayScaleObservation (line 8) | class GrayScaleObservation(gym.ObservationWrapper):
method __init__ (line 23) | def __init__(self, env: gym.Env, keep_dim: bool = False):
method observation (line 50) | def observation(self, observation):
FILE: gym/wrappers/human_rendering.py
class HumanRendering (line 8) | class HumanRendering(gym.Wrapper):
method __init__ (line 42) | def __init__(self, env):
method render_mode (line 62) | def render_mode(self):
method step (line 66) | def step(self, *args, **kwargs):
method reset (line 72) | def reset(self, *args, **kwargs):
method render (line 78) | def render(self):
method _render_frame (line 82) | def _render_frame(self):
method close (line 125) | def close(self):
FILE: gym/wrappers/monitoring/video_recorder.py
class VideoRecorder (line 11) | class VideoRecorder:
method __init__ (line 20) | def __init__(
method functional (line 105) | def functional(self):
method capture_frame (line 109) | def capture_frame(self):
method close (line 139) | def close(self):
method write_metadata (line 170) | def write_metadata(self):
method __del__ (line 175) | def __del__(self):
FILE: gym/wrappers/normalize.py
class RunningMeanStd (line 8) | class RunningMeanStd:
method __init__ (line 12) | def __init__(self, epsilon=1e-4, shape=()):
method update (line 18) | def update(self, x):
method update_from_moments (line 25) | def update_from_moments(self, batch_mean, batch_var, batch_count):
function update_mean_var_count_from_moments (line 32) | def update_mean_var_count_from_moments(
class NormalizeObservation (line 49) | class NormalizeObservation(gym.core.Wrapper):
method __init__ (line 57) | def __init__(self, env: gym.Env, epsilon: float = 1e-8):
method step (line 73) | def step(self, action):
method reset (line 82) | def reset(self, **kwargs):
method normalize (line 91) | def normalize(self, obs):
class NormalizeReward (line 97) | class NormalizeReward(gym.core.Wrapper):
method __init__ (line 107) | def __init__(
method step (line 128) | def step(self, action):
method normalize (line 141) | def normalize(self, rews):
FILE: gym/wrappers/order_enforcing.py
class OrderEnforcing (line 6) | class OrderEnforcing(gym.Wrapper):
method __init__ (line 22) | def __init__(self, env: gym.Env, disable_render_order_enforcing: bool ...
method step (line 33) | def step(self, action):
method reset (line 39) | def reset(self, **kwargs):
method render (line 44) | def render(self, *args, **kwargs):
method has_reset (line 54) | def has_reset(self):
FILE: gym/wrappers/pixel_observation.py
class PixelObservationWrapper (line 15) | class PixelObservationWrapper(gym.ObservationWrapper):
method __init__ (line 49) | def __init__(
method observation (line 165) | def observation(self, observation):
method _add_pixel_observation (line 177) | def _add_pixel_observation(self, wrapped_observation):
method render (line 195) | def render(self, *args, **kwargs):
method _render (line 203) | def _render(self, *args, **kwargs):
FILE: gym/wrappers/record_episode_statistics.py
function add_vector_episode_statistics (line 11) | def add_vector_episode_statistics(
class RecordEpisodeStatistics (line 40) | class RecordEpisodeStatistics(gym.Wrapper):
method __init__ (line 79) | def __init__(self, env: gym.Env, deque_size: int = 100):
method reset (line 96) | def reset(self, **kwargs):
method step (line 103) | def step(self, action):
FILE: gym/wrappers/record_video.py
function capped_cubic_video_schedule (line 10) | def capped_cubic_video_schedule(episode_id: int) -> bool:
class RecordVideo (line 27) | class RecordVideo(gym.Wrapper):
method __init__ (line 40) | def __init__(
method reset (line 92) | def reset(self, **kwargs):
method start_video_recorder (line 109) | def start_video_recorder(self):
method _video_enabled (line 128) | def _video_enabled(self):
method step (line 134) | def step(self, action):
method close_video_recorder (line 176) | def close_video_recorder(self):
method render (line 184) | def render(self, *args, **kwargs):
method close (line 204) | def close(self):
method __del__ (line 209) | def __del__(self):
FILE: gym/wrappers/render_collection.py
class RenderCollection (line 5) | class RenderCollection(gym.Wrapper):
method __init__ (line 8) | def __init__(self, env: gym.Env, pop_frames: bool = True, reset_clean:...
method render_mode (line 26) | def render_mode(self):
method step (line 30) | def step(self, *args, **kwargs):
method reset (line 36) | def reset(self, *args, **kwargs):
method render (line 46) | def render(self):
FILE: gym/wrappers/rescale_action.py
class RescaleAction (line 10) | class RescaleAction(gym.ActionWrapper):
method __init__ (line 30) | def __init__(
method action (line 62) | def action(self, action):
FILE: gym/wrappers/resize_observation.py
class ResizeObservation (line 11) | class ResizeObservation(gym.ObservationWrapper):
method __init__ (line 28) | def __init__(self, env: gym.Env, shape: Union[tuple, int]):
method observation (line 48) | def observation(self, observation):
FILE: gym/wrappers/step_api_compatibility.py
class StepAPICompatibility (line 10) | class StepAPICompatibility(gym.Wrapper):
method __init__ (line 31) | def __init__(self, env: gym.Env, output_truncation_bool: bool = True):
method step (line 45) | def step(self, action):
FILE: gym/wrappers/time_aware_observation.py
class TimeAwareObservation (line 8) | class TimeAwareObservation(gym.ObservationWrapper):
method __init__ (line 24) | def __init__(self, env: gym.Env):
method observation (line 38) | def observation(self, observation):
method step (line 49) | def step(self, action):
method reset (line 61) | def reset(self, **kwargs):
FILE: gym/wrappers/time_limit.py
class TimeLimit (line 7) | class TimeLimit(gym.Wrapper):
method __init__ (line 20) | def __init__(
method step (line 39) | def step(self, action):
method reset (line 58) | def reset(self, **kwargs):
FILE: gym/wrappers/transform_observation.py
class TransformObservation (line 7) | class TransformObservation(gym.ObservationWrapper):
method __init__ (line 23) | def __init__(self, env: gym.Env, f: Callable[[Any], Any]):
method observation (line 34) | def observation(self, observation):
FILE: gym/wrappers/transform_reward.py
class TransformReward (line 8) | class TransformReward(RewardWrapper):
method __init__ (line 24) | def __init__(self, env: gym.Env, f: Callable[[float], float]):
method reward (line 35) | def reward(self, reward):
FILE: gym/wrappers/vector_list_info.py
class VectorListInfo (line 8) | class VectorListInfo(gym.Wrapper):
method __init__ (line 32) | def __init__(self, env):
method step (line 43) | def step(self, action):
method reset (line 50) | def reset(self, **kwargs):
method _convert_info_to_list (line 56) | def _convert_info_to_list(self, infos: dict) -> List[dict]:
method _process_episode_statistics (line 80) | def _process_episode_statistics(self, infos: dict, list_info: list) ->...
FILE: tests/envs/test_action_dim_check.py
function test_mujoco_action_dimensions (line 15) | def test_mujoco_action_dimensions(env_spec: EnvSpec):
function test_discrete_actions_out_of_bound (line 62) | def test_discrete_actions_out_of_bound(env: gym.Env):
function test_box_actions_out_of_bound (line 91) | def test_box_actions_out_of_bound(env: gym.Env):
FILE: tests/envs/test_compatibility.py
class LegacyEnvExplicit (line 11) | class LegacyEnvExplicit(LegacyEnv, gym.Env):
method __init__ (line 18) | def __init__(self):
method reset (line 21) | def reset(self):
method step (line 24) | def step(self, action):
method render (line 27) | def render(self, mode="human"):
method close (line 33) | def close(self):
method seed (line 36) | def seed(self, seed=None):
class LegacyEnvImplicit (line 40) | class LegacyEnvImplicit(gym.Env):
method __init__ (line 47) | def __init__(self):
method reset (line 50) | def reset(self): # type: ignore
method step (line 53) | def step(self, action: Any) -> Tuple[int, float, bool, Dict]:
method render (line 56) | def render(self, mode: Optional[str] = "human") -> Any:
method close (line 62) | def close(self):
method seed (line 65) | def seed(self, seed: Optional[int] = None):
function test_explicit (line 69) | def test_explicit():
function test_implicit (line 82) | def test_implicit():
function test_make_compatibility_in_spec (line 97) | def test_make_compatibility_in_spec():
function test_make_compatibility_in_make (line 116) | def test_make_compatibility_in_make():
FILE: tests/envs/test_env_implementation.py
function test_lunar_lander_heuristics (line 13) | def test_lunar_lander_heuristics():
function test_carracing_domain_randomize (line 20) | def test_carracing_domain_randomize():
function test_bipedal_walker_hardcore_creation (line 60) | def test_bipedal_walker_hardcore_creation(seed: int):
function test_frozenlake_dfs_map_generation (line 99) | def test_frozenlake_dfs_map_generation(map_size: int):
function test_taxi_action_mask (line 129) | def test_taxi_action_mask():
function test_taxi_encode_decode (line 139) | def test_taxi_encode_decode():
function test_customizable_resets (line 157) | def test_customizable_resets(env_name: str, low_high: Optional[list]):
function test_customizable_pendulum_resets (line 180) | def test_customizable_pendulum_resets(low_high: Optional[list]):
function test_invalid_customizable_resets (line 209) | def test_invalid_customizable_resets(env_name: str, low_high: list):
FILE: tests/envs/test_envs.py
function test_envs_pass_env_checker (line 41) | def test_envs_pass_env_checker(spec):
function test_env_determinism_rollout (line 63) | def test_env_determinism_rollout(env_spec: EnvSpec):
function check_rendered (line 118) | def check_rendered(rendered_frame, mode: str):
function test_render_modes (line 161) | def test_render_modes(spec):
function test_pickle_env (line 192) | def test_pickle_env(env: gym.Env):
FILE: tests/envs/test_make.py
function test_make (line 49) | def test_make():
function test_make_deprecated (line 56) | def test_make_deprecated():
function test_make_max_episode_steps (line 67) | def test_make_max_episode_steps():
function test_gym_make_autoreset (line 91) | def test_gym_make_autoreset():
function test_make_disable_env_checker (line 106) | def test_make_disable_env_checker():
function test_apply_api_compatibility (line 135) | def test_apply_api_compatibility():
function test_passive_checker_wrapper_warnings (line 168) | def test_passive_checker_wrapper_warnings(spec):
function test_make_order_enforcing (line 182) | def test_make_order_enforcing():
function test_make_render_mode (line 203) | def test_make_render_mode():
function test_make_kwargs (line 288) | def test_make_kwargs():
function test_import_module_during_make (line 303) | def test_import_module_during_make():
FILE: tests/envs/test_mujoco.py
function verify_environments_match (line 12) | def verify_environments_match(
function test_obs_space_mujoco_environments (line 61) | def test_obs_space_mujoco_environments(env_spec: EnvSpec):
function test_mujoco_v2_to_v3_conversion (line 113) | def test_mujoco_v2_to_v3_conversion(env_name: str):
function test_mujoco_incompatible_v3_to_v2 (line 119) | def test_mujoco_incompatible_v3_to_v2(env_name: str):
FILE: tests/envs/test_register.py
function register_testing_envs (line 11) | def register_testing_envs():
function test_register (line 63) | def test_register(
function test_register_error (line 88) | def test_register_error(env_id):
function test_env_suggestions (line 108) | def test_env_suggestions(register_testing_envs, env_id_input, env_id_sug...
function test_env_version_suggestions (line 126) | def test_env_version_suggestions(
function test_register_versioned_unversioned (line 143) | def test_register_versioned_unversioned():
function test_make_latest_versioned_env (line 176) | def test_make_latest_versioned_env(register_testing_envs):
function test_namespace (line 189) | def test_namespace():
FILE: tests/envs/test_spec.py
function test_spec (line 10) | def test_spec():
function test_spec_kwargs (line 16) | def test_spec_kwargs():
function test_spec_missing_lookup (line 22) | def test_spec_missing_lookup():
function test_spec_malformed_lookup (line 51) | def test_spec_malformed_lookup():
function test_spec_versioned_lookups (line 59) | def test_spec_versioned_lookups():
function test_spec_default_lookups (line 81) | def test_spec_default_lookups():
FILE: tests/envs/utils.py
function try_make_env (line 11) | def try_make_env(env_spec: EnvSpec) -> Optional[gym.Env]:
function assert_equals (line 66) | def assert_equals(a, b, prefix=None):
FILE: tests/envs/utils_envs.py
class RegisterDuringMakeEnv (line 4) | class RegisterDuringMakeEnv(gym.Env):
method __init__ (line 7) | def __init__(self):
class ArgumentEnv (line 12) | class ArgumentEnv(gym.Env):
method __init__ (line 16) | def __init__(self, arg1, arg2, arg3):
class NoHuman (line 23) | class NoHuman(gym.Env):
method __init__ (line 28) | def __init__(self, render_mode=None):
class NoHumanOldAPI (line 33) | class NoHumanOldAPI(gym.Env):
method __init__ (line 38) | def __init__(self):
class NoHumanNoRGB (line 42) | class NoHumanNoRGB(gym.Env):
method __init__ (line 47) | def __init__(self, render_mode=None):
FILE: tests/spaces/test_box.py
function test_shape_inference (line 36) | def test_shape_inference(box, expected_shape):
function test_low_high_values (line 60) | def test_low_high_values(value, valid: bool):
function test_init_errors (line 142) | def test_init_errors(low, high, kwargs, error, message):
function test_dtype_check (line 148) | def test_dtype_check():
function test_infinite_space (line 199) | def test_infinite_space(space):
function test_legacy_state_pickling (line 264) | def test_legacy_state_pickling():
function test_get_inf (line 286) | def test_get_inf():
function test_sample_mask (line 309) | def test_sample_mask():
FILE: tests/spaces/test_dict.py
function test_dict_init (line 9) | def test_dict_init():
function test_dict_seeding (line 56) | def test_dict_seeding():
function test_int_seeding (line 83) | def test_int_seeding():
function test_none_seeding (line 106) | def test_none_seeding():
function test_bad_seed (line 111) | def test_bad_seed():
function test_mapping (line 116) | def test_mapping():
function test_iterator (line 135) | def test_iterator():
FILE: tests/spaces/test_discrete.py
function test_space_legacy_pickling (line 6) | def test_space_legacy_pickling():
function test_sample_mask (line 35) | def test_sample_mask():
FILE: tests/spaces/test_graph.py
function test_node_space_sample (line 9) | def test_node_space_sample():
function test_edge_space_sample (line 49) | def test_edge_space_sample():
function test_not_contains (line 135) | def test_not_contains(sample):
FILE: tests/spaces/test_multibinary.py
function test_sample (line 6) | def test_sample():
FILE: tests/spaces/test_multidiscrete.py
function test_multidiscrete_as_tuple (line 7) | def test_multidiscrete_as_tuple():
function test_multidiscrete_subspace_reproducibility (line 31) | def test_multidiscrete_subspace_reproducibility():
function test_multidiscrete_length (line 57) | def test_multidiscrete_length():
FILE: tests/spaces/test_sequence.py
function test_sample (line 9) | def test_sample():
FILE: tests/spaces/test_space.py
function test_not_implemented_errors (line 22) | def test_not_implemented_errors(func):
FILE: tests/spaces/test_spaces.py
function test_roundtripping (line 36) | def test_roundtripping(space: Space):
function test_space_equality (line 60) | def test_space_equality(space_1, space_2):
function test_sample (line 91) | def test_sample(space: Space, n_trials: int = 1_000):
function test_space_sample_mask (line 234) | def test_space_sample_mask(space: Space, mask, n_trials: int = 100):
function test_seed_reproducibility (line 385) | def test_seed_reproducibility(space):
function test_seed_np_random (line 425) | def test_seed_np_random(space_cls, kwarg):
function test_sample_contains (line 437) | def test_sample_contains(space):
function test_repr (line 453) | def test_repr(space):
function test_space_pickling (line 458) | def test_space_pickling(space):
FILE: tests/spaces/test_text.py
function test_sample_mask (line 9) | def test_sample_mask():
FILE: tests/spaces/test_tuple.py
function test_sequence_inheritance (line 9) | def test_sequence_inheritance():
function test_seeds (line 64) | def test_seeds(space, seed, expected_len):
function test_bad_space_calls (line 86) | def test_bad_space_calls(space_fn):
function test_contains_promotion (line 91) | def test_contains_promotion():
function test_bad_seed (line 103) | def test_bad_seed():
FILE: tests/spaces/test_utils.py
function test_flatdim (line 59) | def test_flatdim(space: gym.spaces.Space, flatdim: Optional[int]):
function test_flatten_space (line 72) | def test_flatten_space(space):
function test_flatten (line 100) | def test_flatten(space):
function test_flat_space_contains_flat_points (line 115) | def test_flat_space_contains_flat_points(space):
function test_flatten_roundtripping (line 125) | def test_flatten_roundtripping(space):
FILE: tests/test_core.py
class ArgumentEnv (line 10) | class ArgumentEnv(core.Env):
method __init__ (line 15) | def __init__(self, arg):
class UnittestEnv (line 20) | class UnittestEnv(core.Env):
method reset (line 24) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
method step (line 28) | def step(self, action):
class UnknownSpacesEnv (line 33) | class UnknownSpacesEnv(core.Env):
method reset (line 40) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
method step (line 48) | def step(self, action):
class OldStyleEnv (line 53) | class OldStyleEnv(core.Env):
method __init__ (line 56) | def __init__(self):
method reset (line 59) | def reset(self):
method step (line 63) | def step(self, action):
class NewPropertyWrapper (line 67) | class NewPropertyWrapper(core.Wrapper):
method __init__ (line 68) | def __init__(
function test_env_instantiation (line 88) | def test_env_instantiation():
function test_wrapper_property_forwarding (line 116) | def test_wrapper_property_forwarding(class_, props):
function test_compatibility_with_old_style_env (line 134) | def test_compatibility_with_old_style_env():
FILE: tests/testing_env.py
function basic_reset_fn (line 11) | def basic_reset_fn(
function new_step_fn (line 23) | def new_step_fn(self, action: ActType) -> Tuple[ObsType, float, bool, bo...
function old_step_fn (line 28) | def old_step_fn(self, action: ActType) -> Tuple[ObsType, float, bool, di...
function basic_render_fn (line 33) | def basic_render_fn(self):
class GenericTestEnv (line 39) | class GenericTestEnv(gym.Env):
method __init__ (line 42) | def __init__(
method reset (line 69) | def reset(
method step (line 78) | def step(self, action: ActType) -> Tuple[ObsType, float, bool, dict]:
method render (line 81) | def render(self):
FILE: tests/utils/test_env_checker.py
function test_no_error_warnings (line 48) | def test_no_error_warnings(env):
function _no_super_reset (line 56) | def _no_super_reset(self, seed=None, options=None):
function _super_reset_fixed (line 63) | def _super_reset_fixed(self, seed=None, options=None):
function _reset_default_seed (line 71) | def _reset_default_seed(self: GenericTestEnv, seed="Error", options=None):
function test_check_reset_seed (line 109) | def test_check_reset_seed(test, func: callable, message: str):
function _deprecated_return_info (line 121) | def _deprecated_return_info(
function _reset_var_keyword_kwargs (line 131) | def _reset_var_keyword_kwargs(self, kwargs):
function _reset_return_info_type (line 135) | def _reset_return_info_type(self, seed=None, options=None):
function _reset_return_info_length (line 141) | def _reset_return_info_length(self, seed=None, options=None):
function _return_info_obs_outside (line 145) | def _return_info_obs_outside(self, seed=None, options=None):
function _return_info_not_dict (line 149) | def _return_info_not_dict(self, seed=None, options=None):
function test_check_reset_return_type (line 178) | def test_check_reset_return_type(test, func: callable, message: str):
function test_check_reset_return_info_deprecation (line 197) | def test_check_reset_return_info_deprecation(test, func: callable, messa...
function test_check_seed_deprecation (line 204) | def test_check_seed_deprecation():
function test_check_reset_options (line 231) | def test_check_reset_options():
function test_check_env (line 259) | def test_check_env(env: gym.Env, message: str):
FILE: tests/utils/test_passive_env_checker.py
function _modify_space (line 21) | def _modify_space(space: spaces.Space, attribute: str, value):
function test_check_observation_space (line 108) | def test_check_observation_space(test, space, message: str):
function test_check_action_space (line 175) | def test_check_action_space(
function test_check_obs (line 232) | def test_check_obs(test, obs, obs_space: spaces.Space, message: str):
function _reset_no_seed (line 246) | def _reset_no_seed(self, options=None):
function _reset_seed_default (line 250) | def _reset_seed_default(self, seed="error", options=None):
function _reset_no_option (line 254) | def _reset_no_option(self, seed=None):
function _make_reset_results (line 258) | def _make_reset_results(results):
function test_passive_env_reset_checker (line 300) | def test_passive_env_reset_checker(test, func: callable, message: str, k...
function _modified_step (line 314) | def _modified_step(
function test_passive_env_step_checker (line 378) | def test_passive_env_step_checker(
function test_passive_render_checker (line 447) | def test_passive_render_checker(test, env: GenericTestEnv, message: str):
FILE: tests/utils/test_play.py
class KeysToActionWrapper (line 27) | class KeysToActionWrapper(gym.Wrapper):
method __init__ (line 28) | def __init__(self, env, keys_to_action):
method get_keys_to_action (line 32) | def get_keys_to_action(self):
class PlayStatus (line 36) | class PlayStatus:
method __init__ (line 37) | def __init__(self, callback: Callable):
method callback (line 42) | def callback(self, obs_t, obs_tp1, action, rew, terminated, truncated,...
function dummy_keys_to_action (line 50) | def dummy_keys_to_action():
function dummy_keys_to_action_str (line 54) | def dummy_keys_to_action_str():
function close_pygame (line 60) | def close_pygame():
function test_play_relevant_keys (line 65) | def test_play_relevant_keys():
function test_play_relevant_keys_no_mapping (line 71) | def test_play_relevant_keys_no_mapping():
function test_play_relevant_keys_with_env_attribute (line 78) | def test_play_relevant_keys_with_env_attribute():
function test_video_size_no_zoom (line 86) | def test_video_size_no_zoom():
function test_video_size_zoom (line 92) | def test_video_size_zoom():
function test_keyboard_quit_event (line 99) | def test_keyboard_quit_event():
function test_pygame_quit_event (line 108) | def test_pygame_quit_event():
function test_keyboard_relevant_keydown_event (line 117) | def test_keyboard_relevant_keydown_event():
function test_keyboard_irrelevant_keydown_event (line 125) | def test_keyboard_irrelevant_keydown_event():
function test_keyboard_keyup_event (line 133) | def test_keyboard_keyup_event():
function test_play_loop_real_env (line 143) | def test_play_loop_real_env():
function test_play_no_keys (line 207) | def test_play_no_keys():
FILE: tests/utils/test_save_video.py
function test_record_video_using_default_trigger (line 10) | def test_record_video_using_default_trigger():
function modulo_step_trigger (line 42) | def modulo_step_trigger(mod: int):
function test_record_video_step_trigger (line 49) | def test_record_video_step_trigger():
function test_record_video_within_vector (line 79) | def test_record_video_within_vector():
FILE: tests/utils/test_seeding.py
function test_invalid_seeds (line 7) | def test_invalid_seeds():
function test_valid_seeds (line 17) | def test_valid_seeds():
function test_rng_pickle (line 23) | def test_rng_pickle():
FILE: tests/utils/test_step_api_compatibility.py
function test_to_done_step_api (line 57) | def test_to_done_step_api(
function test_to_terminated_truncated_step_api (line 115) | def test_to_terminated_truncated_step_api(
function test_edge_case (line 148) | def test_edge_case():
FILE: tests/vector/test_async_vector_env.py
function test_create_async_vector_env (line 19) | def test_create_async_vector_env(shared_memory):
function test_reset_async_vector_env (line 28) | def test_reset_async_vector_env(shared_memory):
function test_step_async_vector_env (line 59) | def test_step_async_vector_env(shared_memory, use_single_action_space):
function test_call_async_vector_env (line 99) | def test_call_async_vector_env(shared_memory):
function test_set_attr_async_vector_env (line 125) | def test_set_attr_async_vector_env(shared_memory):
function test_copy_async_vector_env (line 137) | def test_copy_async_vector_env(shared_memory):
function test_no_copy_async_vector_env (line 149) | def test_no_copy_async_vector_env(shared_memory):
function test_reset_timeout_async_vector_env (line 161) | def test_reset_timeout_async_vector_env(shared_memory):
function test_step_timeout_async_vector_env (line 173) | def test_step_timeout_async_vector_env(shared_memory):
function test_reset_out_of_order_async_vector_env (line 185) | def test_reset_out_of_order_async_vector_env(shared_memory):
function test_step_out_of_order_async_vector_env (line 221) | def test_step_out_of_order_async_vector_env(shared_memory):
function test_already_closed_async_vector_env (line 256) | def test_already_closed_async_vector_env(shared_memory):
function test_check_spaces_async_vector_env (line 265) | def test_check_spaces_async_vector_env(shared_memory):
function test_custom_space_async_vector_env (line 275) | def test_custom_space_async_vector_env():
function test_custom_space_async_vector_env_shared_memory (line 304) | def test_custom_space_async_vector_env_shared_memory():
FILE: tests/vector/test_numpy_utils.py
function test_concatenate (line 15) | def test_concatenate(space):
function test_create_empty_array (line 58) | def test_create_empty_array(space, n):
function test_create_empty_array_zeros (line 88) | def test_create_empty_array_zeros(space, n):
function test_create_empty_array_none_shape_ones (line 118) | def test_create_empty_array_none_shape_ones(space):
FILE: tests/vector/test_shared_memory.py
function test_create_shared_memory (line 51) | def test_create_shared_memory(space, expected_type, n, ctx):
function test_create_shared_memory_custom_space (line 82) | def test_create_shared_memory_custom_space(n, ctx, space):
function _write_shared_memory (line 88) | def _write_shared_memory(space, i, shared_memory, sample):
function test_write_to_shared_memory (line 95) | def test_write_to_shared_memory(space):
function _process_write (line 130) | def _process_write(space, i, shared_memory, sample):
function test_read_from_shared_memory (line 137) | def test_read_from_shared_memory(space):
FILE: tests/vector/test_spaces.py
function test_batch_space (line 96) | def test_batch_space(space, expected_batch_space_4):
function test_batch_space_custom_space (line 106) | def test_batch_space_custom_space(space, expected_batch_space_4):
function test_iterate (line 116) | def test_iterate(space, batch_space):
function test_iterate_custom_space (line 130) | def test_iterate_custom_space(space, batch_space):
function test_rng_different_at_each_index (line 146) | def test_rng_different_at_each_index(space: Space, n: int, base_seed: int):
function test_deterministic (line 169) | def test_deterministic(space: Space, n: int, base_seed: int):
FILE: tests/vector/test_sync_vector_env.py
function test_create_sync_vector_env (line 16) | def test_create_sync_vector_env():
function test_reset_sync_vector_env (line 24) | def test_reset_sync_vector_env():
function test_step_sync_vector_env (line 40) | def test_step_sync_vector_env(use_single_action_space):
function test_call_sync_vector_env (line 79) | def test_call_sync_vector_env():
function test_set_attr_sync_vector_env (line 104) | def test_set_attr_sync_vector_env():
function test_check_spaces_sync_vector_env (line 115) | def test_check_spaces_sync_vector_env():
function test_custom_space_sync_vector_env (line 125) | def test_custom_space_sync_vector_env():
function test_sync_vector_env_seed (line 154) | def test_sync_vector_env_seed():
function test_sync_vector_determinism (line 168) | def test_sync_vector_determinism(spec: EnvSpec, seed: int = 123, n: int ...
FILE: tests/vector/test_vector_env.py
function test_vector_env_equal (line 15) | def test_vector_env_equal(shared_memory):
function test_custom_space_vector_env (line 56) | def test_custom_space_vector_env():
function test_final_obs_info (line 75) | def test_final_obs_info(vectoriser):
FILE: tests/vector/test_vector_env_info.py
function test_vector_env_info (line 15) | def test_vector_env_info(asynchronous):
function test_vector_env_info_concurrent_termination (line 40) | def test_vector_env_info_concurrent_termination(concurrent_ends):
FILE: tests/vector/test_vector_env_wrapper.py
class DummyWrapper (line 6) | class DummyWrapper(VectorEnvWrapper):
method __init__ (line 7) | def __init__(self, env):
method reset_async (line 11) | def reset_async(self, **kwargs):
function test_vector_env_wrapper_inheritance (line 16) | def test_vector_env_wrapper_inheritance():
function test_vector_env_wrapper_attributes (line 23) | def test_vector_env_wrapper_attributes():
FILE: tests/vector/test_vector_make.py
function test_vector_make_id (line 10) | def test_vector_make_id():
function test_vector_make_num_envs (line 18) | def test_vector_make_num_envs(num_envs):
function test_vector_make_asynchronous (line 24) | def test_vector_make_asynchronous():
function test_vector_make_wrappers (line 34) | def test_vector_make_wrappers():
function test_vector_make_disable_env_checker (line 64) | def test_vector_make_disable_env_checker():
FILE: tests/vector/utils.py
class UnittestSlowEnv (line 53) | class UnittestSlowEnv(gym.Env):
method __init__ (line 54) | def __init__(self, slow_reset=0.3):
method reset (line 62) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
method step (line 68) | def step(self, action):
class CustomSpace (line 75) | class CustomSpace(gym.Space):
method sample (line 78) | def sample(self):
method contains (line 81) | def contains(self, x):
method __eq__ (line 84) | def __eq__(self, other):
class CustomSpaceEnv (line 94) | class CustomSpaceEnv(gym.Env):
method __init__ (line 95) | def __init__(self):
method reset (line 100) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
method step (line 104) | def step(self, action):
function make_env (line 110) | def make_env(env_name, seed, **kwargs):
function make_slow_env (line 120) | def make_slow_env(slow_reset, seed):
function make_custom_space_env (line 129) | def make_custom_space_env(seed):
function assert_rng_equal (line 138) | def assert_rng_equal(rng_1: RandomNumberGenerator, rng_2: RandomNumberGe...
FILE: tests/wrappers/test_atari_preprocessing.py
class AleTesting (line 9) | class AleTesting:
method lives (line 15) | def lives(self) -> int:
method getScreenGrayscale (line 19) | def getScreenGrayscale(self, buffer: np.ndarray):
method getScreenRGB (line 23) | def getScreenRGB(self, buffer: np.ndarray):
class AtariTestingEnv (line 28) | class AtariTestingEnv(GenericTestEnv):
method __init__ (line 31) | def __init__(self):
method get_action_meanings (line 41) | def get_action_meanings(self):
function test_atari_preprocessing_grayscale (line 83) | def test_atari_preprocessing_grayscale(env, obs_shape):
function test_atari_preprocessing_scale (line 104) | def test_atari_preprocessing_scale(grayscale, scaled, max_test_steps=10):
FILE: tests/wrappers/test_autoreset.py
class DummyResetEnv (line 13) | class DummyResetEnv(gym.Env):
method __init__ (line 23) | def __init__(self):
method step (line 31) | def step(self, action: int):
method reset (line 42) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
function unwrap_env (line 48) | def unwrap_env(env) -> Generator[gym.Wrapper, None, None]:
function test_make_autoreset_true (line 58) | def test_make_autoreset_true(spec):
function test_gym_make_autoreset (line 83) | def test_gym_make_autoreset(spec):
function test_autoreset_wrapper_autoreset (line 98) | def test_autoreset_wrapper_autoreset():
FILE: tests/wrappers/test_clip_action.py
function test_clip_action (line 7) | def test_clip_action():
FILE: tests/wrappers/test_filter_observation.py
class FakeEnvironment (line 11) | class FakeEnvironment(gym.Env):
method __init__ (line 12) | def __init__(
method render (line 24) | def render(self, mode="human"):
method reset (line 28) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
method step (line 33) | def step(self, action):
class TestFilterObservation (line 54) | class TestFilterObservation:
method test_filter_observation (line 58) | def test_filter_observation(self, observation_keys, filter_keys):
method test_raises_with_incorrect_arguments (line 81) | def test_raises_with_incorrect_arguments(
FILE: tests/wrappers/test_flatten.py
class FakeEnvironment (line 14) | class FakeEnvironment(gym.Env):
method __init__ (line 15) | def __init__(self, observation_space):
method reset (line 18) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
class TestFlattenEnvironment (line 62) | class TestFlattenEnvironment:
method test_flattened_environment (line 64) | def test_flattened_environment(self, observation_space, ordered_values):
method test_flatten_unflatten (line 78) | def test_flatten_unflatten(self, observation_space, ordered_values):
method _check_observations (line 89) | def _check_observations(self, original, flattened, unflattened, ordere...
FILE: tests/wrappers/test_flatten_observation.py
function test_flatten_observation (line 10) | def test_flatten_observation(env_id):
FILE: tests/wrappers/test_frame_stack.py
function test_frame_stack (line 27) | def test_frame_stack(env_id, num_stack, lz4_compress):
FILE: tests/wrappers/test_gray_scale_observation.py
function test_gray_scale_observation (line 10) | def test_gray_scale_observation(env_id, keep_dim):
FILE: tests/wrappers/test_human_rendering.py
function test_human_rendering (line 9) | def test_human_rendering():
FILE: tests/wrappers/test_nested_dict.py
class FakeEnvironment (line 12) | class FakeEnvironment(gym.Env):
method __init__ (line 13) | def __init__(self, observation_space, render_mode=None):
method render (line 19) | def render(self, mode="human"):
method reset (line 23) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
method step (line 28) | def step(self, action):
class TestNestedDictWrapper (line 100) | class TestNestedDictWrapper:
method test_nested_dicts_size (line 102) | def test_nested_dicts_size(self, observation_space, flat_shape):
method test_nested_dicts_ravel (line 115) | def test_nested_dicts_ravel(self, observation_space, flat_shape):
FILE: tests/wrappers/test_normalize.py
class DummyRewardEnv (line 10) | class DummyRewardEnv(gym.Env):
method __init__ (line 13) | def __init__(self, return_reward_idx=0):
method step (line 22) | def step(self, action):
method reset (line 32) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
function make_env (line 38) | def make_env(return_reward_idx):
function test_normalize_observation (line 46) | def test_normalize_observation():
function test_normalize_reset_info (line 56) | def test_normalize_reset_info():
function test_normalize_return (line 64) | def test_normalize_return():
function test_normalize_observation_vector_env (line 82) | def test_normalize_observation_vector_env():
function test_normalize_return_vector_env (line 107) | def test_normalize_return_vector_env():
FILE: tests/wrappers/test_order_enforcing.py
function test_gym_make_order_enforcing (line 14) | def test_gym_make_order_enforcing(spec):
function test_order_enforcing (line 21) | def test_order_enforcing():
FILE: tests/wrappers/test_passive_env_checker.py
function test_passive_checker_wrapper_warnings (line 19) | def test_passive_checker_wrapper_warnings(env):
function test_initialise_failures (line 54) | def test_initialise_failures(env, message):
function _reset_failure (line 61) | def _reset_failure(self, seed=None, options=None):
function _step_failure (line 65) | def _step_failure(self, action):
function test_api_failures (line 69) | def test_api_failures():
FILE: tests/wrappers/test_pixel_observation.py
class FakeEnvironment (line 12) | class FakeEnvironment(gym.Env):
method __init__ (line 13) | def __init__(self, render_mode="single_rgb_array"):
method render (line 17) | def render(self, mode="human", width=32, height=32):
method reset (line 21) | def reset(self, *, seed: Optional[int] = None, options: Optional[dict]...
method step (line 26) | def step(self, action):
class FakeArrayObservationEnvironment (line 33) | class FakeArrayObservationEnvironment(FakeEnvironment):
method __init__ (line 34) | def __init__(self, *args, **kwargs):
class FakeDictObservationEnvironment (line 41) | class FakeDictObservationEnvironment(FakeEnvironment):
method __init__ (line 42) | def __init__(self, *args, **kwargs):
function test_dict_observation (line 52) | def test_dict_observation(pixels_only):
function test_single_array_observation (line 94) | def test_single_array_observation(pixels_only):
FILE: tests/wrappers/test_record_episode_statistics.py
function test_record_episode_statistics (line 11) | def test_record_episode_statistics(env_id, deque_size):
function test_record_episode_statistics_reset_info (line 30) | def test_record_episode_statistics_reset_info():
function test_record_episode_statistics_with_vectorenv (line 42) | def test_record_episode_statistics_with_vectorenv(num_envs, asynchronous):
function test_wrong_wrapping_order (line 70) | def test_wrong_wrapping_order():
function test_add_vector_episode_statistics (line 79) | def test_add_vector_episode_statistics():
FILE: tests/wrappers/test_record_video.py
function test_record_video_using_default_trigger (line 8) | def test_record_video_using_default_trigger():
function test_record_video_reset (line 28) | def test_record_video_reset():
function test_record_video_step_trigger (line 40) | def test_record_video_step_trigger():
function make_env (line 57) | def make_env(gym_id, seed, **kwargs):
function test_record_video_within_vector (line 70) | def test_record_video_within_vector():
FILE: tests/wrappers/test_rescale_action.py
function test_rescale_action (line 8) | def test_rescale_action():
FILE: tests/wrappers/test_resize_observation.py
function test_resize_observation (line 10) | def test_resize_observation(env_id, shape):
FILE: tests/wrappers/test_step_compatibility.py
class OldStepEnv (line 8) | class OldStepEnv(gym.Env):
method __init__ (line 9) | def __init__(self):
method step (line 13) | def step(self, action):
class NewStepEnv (line 21) | class NewStepEnv(gym.Env):
method __init__ (line 22) | def __init__(self):
method step (line 26) | def step(self, action):
function test_step_compatibility_to_new_api (line 37) | def test_step_compatibility_to_new_api(env, output_truncation_bool):
function test_step_compatibility_to_old_api (line 49) | def test_step_compatibility_to_old_api(env):
function test_step_compatibility_in_make (line 58) | def test_step_compatibility_in_make(apply_api_compatibility):
FILE: tests/wrappers/test_time_aware_observation.py
function test_time_aware_observation (line 9) | def test_time_aware_observation(env_id):
FILE: tests/wrappers/test_time_limit.py
function test_time_limit_reset_info (line 8) | def test_time_limit_reset_info():
function test_time_limit_wrapper (line 18) | def test_time_limit_wrapper(double_wrap):
function test_termination_on_last_step (line 39) | def test_termination_on_last_step(double_wrap):
FILE: tests/wrappers/test_transform_observation.py
function test_transform_observation (line 9) | def test_transform_observation(env_id):
FILE: tests/wrappers/test_transform_reward.py
function test_transform_reward (line 9) | def test_transform_reward(env_id):
FILE: tests/wrappers/test_vector_list_info.py
function test_usage_in_vector_env (line 12) | def test_usage_in_vector_env():
function test_info_to_list (line 22) | def test_info_to_list():
function test_info_to_list_statistics (line 40) | def test_info_to_list_statistics():
FILE: tests/wrappers/test_video_recorder.py
class BrokenRecordableEnv (line 12) | class BrokenRecordableEnv(gym.Env):
method __init__ (line 15) | def __init__(self, render_mode="rgb_array_list"):
method render (line 18) | def render(self):
class UnrecordableEnv (line 22) | class UnrecordableEnv(gym.Env):
method __init__ (line 25) | def __init__(self, render_mode=None):
method render (line 28) | def render(self):
function test_record_simple (line 32) | def test_record_simple():
function test_autoclose (line 48) | def test_autoclose():
function test_no_frames (line 73) | def test_no_frames():
function test_record_unrecordable_method (line 81) | def test_record_unrecordable_method():
function test_record_breaking_render_method (line 94) | def test_record_breaking_render_method():
function test_text_envs (line 109) | def test_text_envs():
FILE: tests/wrappers/utils.py
function has_wrapper (line 4) | def has_wrapper(wrapped_env: gym.Env, wrapper_type: type) -> bool:
Condensed preview — 219 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,248K chars).
[
{
"path": ".github/ISSUE_TEMPLATE/bug.md",
"chars": 875,
"preview": "---\nname: Bug Report\nabout: Submit a bug report\ntitle: \"[Bug Report] Bug title\"\n\n---\n\nIf you are submitting a bug report"
},
{
"path": ".github/ISSUE_TEMPLATE/proposal.md",
"chars": 810,
"preview": "---\nname: Proposal\nabout: Propose changes that are not fixes bugs\ntitle: \"[Proposal] Proposal title\"\n---\n\n\n\n### Proposal"
},
{
"path": ".github/ISSUE_TEMPLATE/question.md",
"chars": 594,
"preview": "---\nname: Question\nabout: Ask a question\ntitle: \"[Question] Question title\"\n---\n\n\n### Question\n\nIf you're a beginner and"
},
{
"path": ".github/PULL_REQUEST_TEMPLATE.md",
"chars": 1596,
"preview": "# Description\n\nPlease include a summary of the change and which issue is fixed. Please also include relevant motivation "
},
{
"path": ".github/stale.yml",
"chars": 2004,
"preview": "# Configuration for probot-stale - https://github.com/probot/stale\n\n# Number of days of inactivity before an Issue or Pu"
},
{
"path": ".github/workflows/build.yml",
"chars": 507,
"preview": "name: build\non: [pull_request, push]\n\npermissions:\n contents: read # to fetch code (actions/checkout)\n\njobs:\n build:\n "
},
{
"path": ".github/workflows/pre-commit.yml",
"chars": 520,
"preview": "# https://pre-commit.com\n# This GitHub Action assumes that the repo contains a valid .pre-commit-config.yaml file.\nname:"
},
{
"path": ".gitignore",
"chars": 411,
"preview": "*.swp\n*.pyc\n*.py~\n.DS_Store\n.cache\n.pytest_cache/\n\n# Setuptools distribution and build folders.\n/dist/\n/build\n\n# Virtual"
},
{
"path": ".pre-commit-config.yaml",
"chars": 1536,
"preview": "---\nrepos:\n - repo: https://github.com/python/black\n rev: 22.3.0\n hooks:\n - id: black\n - repo: https://gith"
},
{
"path": "CODE_OF_CONDUCT.rst",
"chars": 673,
"preview": "OpenAI Gym is dedicated to providing a harassment-free experience for\neveryone, regardless of gender, gender identity an"
},
{
"path": "CONTRIBUTING.md",
"chars": 3771,
"preview": "# Gym Contribution Guidelines\n\nAt this time we are currently accepting the current forms of contributions:\n\n- Bug report"
},
{
"path": "LICENSE.md",
"chars": 1464,
"preview": "The MIT License\n\nCopyright (c) 2016 OpenAI (https://openai.com)\n\nPermission is hereby granted, free of charge, to any pe"
},
{
"path": "README.md",
"chars": 4854,
"preview": "[](https://pr"
},
{
"path": "bin/docker_entrypoint",
"chars": 484,
"preview": "#!/bin/bash\n# This script is the entrypoint for our Docker image.\n\nset -ex\n\n# Set up display; otherwise rendering will f"
},
{
"path": "gym/__init__.py",
"chars": 1177,
"preview": "\"\"\"Root __init__ of the gym module setting the __all__ of gym modules.\"\"\"\n# isort: skip_file\n\nfrom gym import error\nfrom"
},
{
"path": "gym/core.py",
"chars": 20614,
"preview": "\"\"\"Core API for Environment, Wrapper, ActionWrapper, RewardWrapper and ObservationWrapper.\"\"\"\nimport sys\nfrom typing imp"
},
{
"path": "gym/envs/__init__.py",
"chars": 6961,
"preview": "from gym.envs.registration import load_env_plugins as _load_env_plugins\nfrom gym.envs.registration import make, register"
},
{
"path": "gym/envs/box2d/__init__.py",
"chars": 202,
"preview": "from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore\nfrom gym.envs.box2d.car_racing import Car"
},
{
"path": "gym/envs/box2d/bipedal_walker.py",
"chars": 31174,
"preview": "__credits__ = [\"Andrea PIERRÉ\"]\n\nimport math\nfrom typing import TYPE_CHECKING, List, Optional\n\nimport numpy as np\n\nimpor"
},
{
"path": "gym/envs/box2d/car_dynamics.py",
"chars": 12147,
"preview": "\"\"\"\nTop-down car dynamics simulation.\n\nSome ideas are taken from this great tutorial http://www.iforce2d.net/b2dtut/top-"
},
{
"path": "gym/envs/box2d/car_racing.py",
"chars": 28931,
"preview": "__credits__ = [\"Andrea PIERRÉ\"]\n\nimport math\nfrom typing import Optional, Union\n\nimport numpy as np\n\nimport gym\nfrom gym"
},
{
"path": "gym/envs/box2d/lunar_lander.py",
"chars": 29801,
"preview": "__credits__ = [\"Andrea PIERRÉ\"]\n\nimport math\nimport warnings\nfrom typing import TYPE_CHECKING, Optional\n\nimport numpy as"
},
{
"path": "gym/envs/classic_control/__init__.py",
"chars": 324,
"preview": "from gym.envs.classic_control.acrobot import AcrobotEnv\nfrom gym.envs.classic_control.cartpole import CartPoleEnv\nfrom g"
},
{
"path": "gym/envs/classic_control/acrobot.py",
"chars": 16810,
"preview": "\"\"\"classic Acrobot task\"\"\"\nfrom typing import Optional\n\nimport numpy as np\nfrom numpy import cos, pi, sin\n\nfrom gym impo"
},
{
"path": "gym/envs/classic_control/cartpole.py",
"chars": 11570,
"preview": "\"\"\"\nClassic cart-pole system implemented by Rich Sutton et al.\nCopied from http://incompleteideas.net/sutton/book/code/p"
},
{
"path": "gym/envs/classic_control/continuous_mountain_car.py",
"chars": 10546,
"preview": "\"\"\"\n@author: Olivier Sigaud\n\nA merge between two sources:\n\n* Adaptation of the MountainCar Environment from the \"FAReinf"
},
{
"path": "gym/envs/classic_control/mountain_car.py",
"chars": 9826,
"preview": "\"\"\"\nhttp://incompleteideas.net/MountainCar/MountainCar1.cp\npermalink: https://perma.cc/6Z2N-PFWC\n\"\"\"\nimport math\nfrom ty"
},
{
"path": "gym/envs/classic_control/pendulum.py",
"chars": 9519,
"preview": "__credits__ = [\"Carlos Luis\"]\n\nfrom os import path\nfrom typing import Optional\n\nimport numpy as np\n\nimport gym\nfrom gym "
},
{
"path": "gym/envs/classic_control/utils.py",
"chars": 1415,
"preview": "\"\"\"\nUtility functions used for classic control environments.\n\"\"\"\n\nfrom typing import Optional, SupportsFloat, Tuple\n\n\nde"
},
{
"path": "gym/envs/mujoco/__init__.py",
"chars": 662,
"preview": "from gym.envs.mujoco.mujoco_env import MujocoEnv, MuJocoPyEnv # isort:skip\n\nfrom gym.envs.mujoco.ant import AntEnv\nfrom"
},
{
"path": "gym/envs/mujoco/ant.py",
"chars": 2400,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\n\nclass Ant"
},
{
"path": "gym/envs/mujoco/ant_v3.py",
"chars": 5705,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\nDEFAULT_CA"
},
{
"path": "gym/envs/mujoco/ant_v4.py",
"chars": 19981,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MujocoEnv\nfrom gym.spaces import Box\n\nDEFAULT_CAME"
},
{
"path": "gym/envs/mujoco/assets/ant.xml",
"chars": 4934,
"preview": "<mujoco model=\"ant\">\n <compiler angle=\"degree\" coordinate=\"local\" inertiafromgeom=\"true\"/>\n <option integrator=\"RK4\" t"
},
{
"path": "gym/envs/mujoco/assets/half_cheetah.xml",
"chars": 5616,
"preview": "<!-- Cheetah Model\n\n The state space is populated with joints in the order that they are\n defined in this file. Th"
},
{
"path": "gym/envs/mujoco/assets/hopper.xml",
"chars": 3028,
"preview": "<mujoco model=\"hopper\">\n <compiler angle=\"degree\" coordinate=\"global\" inertiafromgeom=\"true\"/>\n <default>\n <joint a"
},
{
"path": "gym/envs/mujoco/assets/humanoid.xml",
"chars": 8866,
"preview": "<mujoco model=\"humanoid\">\r\n <compiler angle=\"degree\" inertiafromgeom=\"true\"/>\r\n <default>\r\n <joint armature"
},
{
"path": "gym/envs/mujoco/assets/humanoidstandup.xml",
"chars": 8854,
"preview": "<mujoco model=\"humanoidstandup\">\r\n <compiler angle=\"degree\" inertiafromgeom=\"true\"/>\r\n <default>\r\n <joint a"
},
{
"path": "gym/envs/mujoco/assets/inverted_double_pendulum.xml",
"chars": 1953,
"preview": "<!-- Cartpole Model\n\n The state space is populated with joints in the order that they are\n defined in this file. T"
},
{
"path": "gym/envs/mujoco/assets/inverted_pendulum.xml",
"chars": 1378,
"preview": "<mujoco model=\"inverted pendulum\">\r\n\t<compiler inertiafromgeom=\"true\"/>\r\n\t<default>\r\n\t\t<joint armature=\"0\" damping=\"1\" l"
},
{
"path": "gym/envs/mujoco/assets/point.xml",
"chars": 1815,
"preview": "<mujoco>\n <compiler angle=\"degree\" coordinate=\"local\" inertiafromgeom=\"true\"/>\n <option integrator=\"RK4\" timestep=\"0.0"
},
{
"path": "gym/envs/mujoco/assets/pusher.xml",
"chars": 5368,
"preview": "<mujoco model=\"arm3d\">\n <compiler inertiafromgeom=\"true\" angle=\"radian\" coordinate=\"local\"/>\n <option timestep=\"0.01\" "
},
{
"path": "gym/envs/mujoco/assets/reacher.xml",
"chars": 2362,
"preview": "<mujoco model=\"reacher\">\r\n\t<compiler angle=\"radian\" inertiafromgeom=\"true\"/>\r\n\t<default>\r\n\t\t<joint armature=\"1\" damping="
},
{
"path": "gym/envs/mujoco/assets/swimmer.xml",
"chars": 2361,
"preview": "<mujoco model=\"swimmer\">\n <compiler angle=\"degree\" coordinate=\"local\" inertiafromgeom=\"true\"/>\n <option collision=\"pre"
},
{
"path": "gym/envs/mujoco/assets/walker2d.xml",
"chars": 4285,
"preview": "<mujoco model=\"walker2d\">\n <compiler angle=\"degree\" coordinate=\"global\" inertiafromgeom=\"true\"/>\n <default>\n <joint"
},
{
"path": "gym/envs/mujoco/half_cheetah.py",
"chars": 1840,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\n\nclass Hal"
},
{
"path": "gym/envs/mujoco/half_cheetah_v3.py",
"chars": 3669,
"preview": "__credits__ = [\"Rushiv Arora\"]\n\nimport numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom g"
},
{
"path": "gym/envs/mujoco/half_cheetah_v4.py",
"chars": 13251,
"preview": "__credits__ = [\"Rushiv Arora\"]\n\nimport numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MujocoEnv\nfrom gym"
},
{
"path": "gym/envs/mujoco/hopper.py",
"chars": 2026,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\n\nclass Hop"
},
{
"path": "gym/envs/mujoco/hopper_v3.py",
"chars": 5316,
"preview": "__credits__ = [\"Rushiv Arora\"]\n\nimport numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom g"
},
{
"path": "gym/envs/mujoco/hopper_v4.py",
"chars": 16025,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MujocoEnv\nfrom gym.spaces import Box\n\nDEFAULT_CAME"
},
{
"path": "gym/envs/mujoco/humanoid.py",
"chars": 2800,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\n\ndef mass_"
},
{
"path": "gym/envs/mujoco/humanoid_v3.py",
"chars": 6299,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\nDEFAULT_CA"
},
{
"path": "gym/envs/mujoco/humanoid_v4.py",
"chars": 27951,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MujocoEnv\nfrom gym.spaces import Box\n\nDEFAULT_CAME"
},
{
"path": "gym/envs/mujoco/humanoidstandup.py",
"chars": 2449,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\n\nclass Hum"
},
{
"path": "gym/envs/mujoco/humanoidstandup_v4.py",
"chars": 21674,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MujocoEnv\nfrom gym.spaces import Box\n\n\nclass Human"
},
{
"path": "gym/envs/mujoco/inverted_double_pendulum.py",
"chars": 2125,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\n\nclass Inv"
},
{
"path": "gym/envs/mujoco/inverted_double_pendulum_v4.py",
"chars": 9332,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MujocoEnv\nfrom gym.spaces import Box\n\n\nclass Inver"
},
{
"path": "gym/envs/mujoco/inverted_pendulum.py",
"chars": 1596,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\n\nclass Inv"
},
{
"path": "gym/envs/mujoco/inverted_pendulum_v4.py",
"chars": 5736,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MujocoEnv\nfrom gym.spaces import Box\n\n\nclass Inver"
},
{
"path": "gym/envs/mujoco/mujoco_env.py",
"chars": 14450,
"preview": "from os import path\nfrom typing import Optional, Union\n\nimport numpy as np\n\nimport gym\nfrom gym import error, logger, sp"
},
{
"path": "gym/envs/mujoco/mujoco_rendering.py",
"chars": 19629,
"preview": "import collections\nimport os\nimport time\nfrom threading import Lock\n\nimport glfw\nimport imageio\nimport mujoco\nimport num"
},
{
"path": "gym/envs/mujoco/pusher.py",
"chars": 2504,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\n\nclass Pus"
},
{
"path": "gym/envs/mujoco/pusher_v4.py",
"chars": 12160,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MujocoEnv\nfrom gym.spaces import Box\n\n\nclass Pushe"
},
{
"path": "gym/envs/mujoco/reacher.py",
"chars": 2190,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\n\nclass Rea"
},
{
"path": "gym/envs/mujoco/reacher_v4.py",
"chars": 10039,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MujocoEnv\nfrom gym.spaces import Box\n\n\nclass Reach"
},
{
"path": "gym/envs/mujoco/swimmer.py",
"chars": 1676,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\n\nclass Swi"
},
{
"path": "gym/envs/mujoco/swimmer_v3.py",
"chars": 3877,
"preview": "__credits__ = [\"Rushiv Arora\"]\n\nimport numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom g"
},
{
"path": "gym/envs/mujoco/swimmer_v4.py",
"chars": 11889,
"preview": "__credits__ = [\"Rushiv Arora\"]\n\nimport numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MujocoEnv\nfrom gym"
},
{
"path": "gym/envs/mujoco/walker2d.py",
"chars": 1897,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\n\nclass Wal"
},
{
"path": "gym/envs/mujoco/walker2d_v3.py",
"chars": 4942,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MuJocoPyEnv\nfrom gym.spaces import Box\n\nDEFAULT_CA"
},
{
"path": "gym/envs/mujoco/walker2d_v4.py",
"chars": 16500,
"preview": "import numpy as np\n\nfrom gym import utils\nfrom gym.envs.mujoco import MujocoEnv\nfrom gym.spaces import Box\n\nDEFAULT_CAME"
},
{
"path": "gym/envs/registration.py",
"chars": 26208,
"preview": "import contextlib\nimport copy\nimport difflib\nimport importlib\nimport importlib.util\nimport re\nimport sys\nimport warnings"
},
{
"path": "gym/envs/toy_text/__init__.py",
"chars": 211,
"preview": "from gym.envs.toy_text.blackjack import BlackjackEnv\nfrom gym.envs.toy_text.cliffwalking import CliffWalkingEnv\nfrom gym"
},
{
"path": "gym/envs/toy_text/blackjack.py",
"chars": 10806,
"preview": "import os\nfrom typing import Optional\n\nimport numpy as np\n\nimport gym\nfrom gym import spaces\nfrom gym.error import Depen"
},
{
"path": "gym/envs/toy_text/cliffwalking.py",
"chars": 10940,
"preview": "from contextlib import closing\nfrom io import StringIO\nfrom os import path\nfrom typing import Optional\n\nimport numpy as "
},
{
"path": "gym/envs/toy_text/frozen_lake.py",
"chars": 13715,
"preview": "from contextlib import closing\nfrom io import StringIO\nfrom os import path\nfrom typing import List, Optional\n\nimport num"
},
{
"path": "gym/envs/toy_text/taxi.py",
"chars": 18318,
"preview": "from contextlib import closing\nfrom io import StringIO\nfrom os import path\nfrom typing import Optional\n\nimport numpy as "
},
{
"path": "gym/envs/toy_text/utils.py",
"chars": 295,
"preview": "import numpy as np\n\n\ndef categorical_sample(prob_n, np_random: np.random.Generator):\n \"\"\"Sample from categorical dist"
},
{
"path": "gym/error.py",
"chars": 5545,
"preview": "\"\"\"Set of Error classes for gym.\"\"\"\nimport warnings\n\n\nclass Error(Exception):\n \"\"\"Error superclass.\"\"\"\n\n\n# Local erro"
},
{
"path": "gym/logger.py",
"chars": 1774,
"preview": "\"\"\"Set of functions for logging messages.\"\"\"\nimport sys\nimport warnings\nfrom typing import Optional, Type\n\nfrom gym.util"
},
{
"path": "gym/py.typed",
"chars": 0,
"preview": ""
},
{
"path": "gym/spaces/__init__.py",
"chars": 1266,
"preview": "\"\"\"This module implements various spaces.\n\nSpaces describe mathematical sets and are used in Gym to specify valid action"
},
{
"path": "gym/spaces/box.py",
"chars": 12732,
"preview": "\"\"\"Implementation of a space that represents closed boxes in euclidean space.\"\"\"\nfrom typing import Dict, List, Optional"
},
{
"path": "gym/spaces/dict.py",
"chars": 10427,
"preview": "\"\"\"Implementation of a space that represents the cartesian product of other spaces as a dictionary.\"\"\"\nfrom collections "
},
{
"path": "gym/spaces/discrete.py",
"chars": 4476,
"preview": "\"\"\"Implementation of a space consisting of finitely many elements.\"\"\"\nfrom typing import Optional, Union\n\nimport numpy a"
},
{
"path": "gym/spaces/graph.py",
"chars": 9771,
"preview": "\"\"\"Implementation of a space that represents graph information where nodes and edges can be represented with euclidean s"
},
{
"path": "gym/spaces/multi_binary.py",
"chars": 4569,
"preview": "\"\"\"Implementation of a space that consists of binary np.ndarrays of a fixed shape.\"\"\"\nfrom typing import Optional, Seque"
},
{
"path": "gym/spaces/multi_discrete.py",
"chars": 7519,
"preview": "\"\"\"Implementation of a space that represents the cartesian product of `Discrete` spaces.\"\"\"\nfrom typing import Iterable,"
},
{
"path": "gym/spaces/sequence.py",
"chars": 5487,
"preview": "\"\"\"Implementation of a space that represents finite-length sequences.\"\"\"\nfrom collections.abc import Sequence as Collect"
},
{
"path": "gym/spaces/space.py",
"chars": 5653,
"preview": "\"\"\"Implementation of the `Space` metaclass.\"\"\"\n\nfrom typing import (\n Any,\n Generic,\n Iterable,\n List,\n M"
},
{
"path": "gym/spaces/text.py",
"chars": 7660,
"preview": "\"\"\"Implementation of a space that represents textual strings.\"\"\"\nfrom typing import Any, Dict, FrozenSet, Optional, Set,"
},
{
"path": "gym/spaces/tuple.py",
"chars": 6382,
"preview": "\"\"\"Implementation of a space that represents the cartesian product of other spaces.\"\"\"\nfrom collections.abc import Seque"
},
{
"path": "gym/spaces/utils.py",
"chars": 14940,
"preview": "\"\"\"Implementation of utility functions that can be applied to spaces.\n\nThese functions mostly take care of flattening an"
},
{
"path": "gym/utils/__init__.py",
"chars": 420,
"preview": "\"\"\"A set of common utilities used within the environments.\n\nThese are not intended as API functions, and will not remain"
},
{
"path": "gym/utils/colorize.py",
"chars": 974,
"preview": "\"\"\"A set of common utilities used within the environments.\n\nThese are not intended as API functions, and will not remain"
},
{
"path": "gym/utils/env_checker.py",
"chars": 12606,
"preview": "\"\"\"A set of functions for checking an environment details.\n\nThis file is originally from the Stable Baselines3 repositor"
},
{
"path": "gym/utils/ezpickle.py",
"chars": 1354,
"preview": "\"\"\"Class for pickling and unpickling objects via their constructor arguments.\"\"\"\n\n\nclass EzPickle:\n \"\"\"Objects that a"
},
{
"path": "gym/utils/passive_env_checker.py",
"chars": 15046,
"preview": "\"\"\"A set of functions for passively checking environment implementations.\"\"\"\nimport inspect\nfrom functools import partia"
},
{
"path": "gym/utils/play.py",
"chars": 15289,
"preview": "\"\"\"Utilities of visualising an environment.\"\"\"\nfrom collections import deque\nfrom typing import Callable, Dict, List, Op"
},
{
"path": "gym/utils/save_video.py",
"chars": 4191,
"preview": "\"\"\"Utility functions to save rendering videos.\"\"\"\nimport os\nfrom typing import Callable, Optional\n\nimport gym\nfrom gym i"
},
{
"path": "gym/utils/seeding.py",
"chars": 911,
"preview": "\"\"\"Set of random number generator functions: seeding, generator, hashing seeds.\"\"\"\nfrom typing import Any, Optional, Tup"
},
{
"path": "gym/utils/step_api_compatibility.py",
"chars": 6522,
"preview": "\"\"\"Contains methods for step compatibility, from old-to-new and new-to-old API.\"\"\"\nfrom typing import Tuple, Union\n\nimpo"
},
{
"path": "gym/vector/__init__.py",
"chars": 3023,
"preview": "\"\"\"Module for vector environments.\"\"\"\nfrom typing import Iterable, List, Optional, Union\n\nimport gym\nfrom gym.vector.asy"
},
{
"path": "gym/vector/async_vector_env.py",
"chars": 27608,
"preview": "\"\"\"An async vector environment.\"\"\"\nimport multiprocessing as mp\nimport sys\nimport time\nfrom copy import deepcopy\nfrom en"
},
{
"path": "gym/vector/sync_vector_env.py",
"chars": 8760,
"preview": "\"\"\"A synchronous vector environment.\"\"\"\nfrom copy import deepcopy\nfrom typing import Any, Callable, Iterator, List, Opti"
},
{
"path": "gym/vector/utils/__init__.py",
"chars": 727,
"preview": "\"\"\"Module for gym vector utils.\"\"\"\nfrom gym.vector.utils.misc import CloudpickleWrapper, clear_mpi_env_vars\nfrom gym.vec"
},
{
"path": "gym/vector/utils/misc.py",
"chars": 1587,
"preview": "\"\"\"Miscellaneous utilities.\"\"\"\nimport contextlib\nimport os\n\n__all__ = [\"CloudpickleWrapper\", \"clear_mpi_env_vars\"]\n\n\ncla"
},
{
"path": "gym/vector/utils/numpy_utils.py",
"chars": 4477,
"preview": "\"\"\"Numpy utility functions: concatenate space samples and create empty array.\"\"\"\nfrom collections import OrderedDict\nfro"
},
{
"path": "gym/vector/utils/shared_memory.py",
"chars": 6522,
"preview": "\"\"\"Utility functions for vector environments to share memory between processes.\"\"\"\nimport multiprocessing as mp\nfrom col"
},
{
"path": "gym/vector/utils/spaces.py",
"chars": 6434,
"preview": "\"\"\"Utility functions for gym spaces: batch space and iterator.\"\"\"\nfrom collections import OrderedDict\nfrom copy import d"
},
{
"path": "gym/vector/vector_env.py",
"chars": 11487,
"preview": "\"\"\"Base class for vectorized environments.\"\"\"\nfrom typing import Any, List, Optional, Tuple, Union\n\nimport numpy as np\n\n"
},
{
"path": "gym/version.py",
"chars": 19,
"preview": "VERSION = \"0.26.2\"\n"
},
{
"path": "gym/wrappers/README.md",
"chars": 1056,
"preview": "# Wrappers\n\nWrappers are used to transform an environment in a modular way:\n\n```python\nenv = gym.make('Pong-v0')\nenv = M"
},
{
"path": "gym/wrappers/__init__.py",
"chars": 1356,
"preview": "\"\"\"Module of wrapper classes.\"\"\"\nfrom gym import error\nfrom gym.wrappers.atari_preprocessing import AtariPreprocessing\nf"
},
{
"path": "gym/wrappers/atari_preprocessing.py",
"chars": 7860,
"preview": "\"\"\"Implementation of Atari 2600 Preprocessing following the guidelines of Machado et al., 2018.\"\"\"\nimport numpy as np\n\ni"
},
{
"path": "gym/wrappers/autoreset.py",
"chars": 3131,
"preview": "\"\"\"Wrapper that autoreset environments when `terminated=True` or `truncated=True`.\"\"\"\nimport gym\n\n\nclass AutoResetWrappe"
},
{
"path": "gym/wrappers/clip_action.py",
"chars": 1155,
"preview": "\"\"\"Wrapper for clipping actions within a valid bound.\"\"\"\nimport numpy as np\n\nimport gym\nfrom gym import ActionWrapper\nfr"
},
{
"path": "gym/wrappers/compatibility.py",
"chars": 4288,
"preview": "\"\"\"A compatibility wrapper converting an old-style environment into a valid environment.\"\"\"\nimport sys\nfrom typing impor"
},
{
"path": "gym/wrappers/env_checker.py",
"chars": 2306,
"preview": "\"\"\"A passive environment checker wrapper for an environment's observation and action space along with the reset, step an"
},
{
"path": "gym/wrappers/filter_observation.py",
"chars": 3435,
"preview": "\"\"\"A wrapper for filtering dictionary observations by their keys.\"\"\"\nimport copy\nfrom typing import Sequence\n\nimport gym"
},
{
"path": "gym/wrappers/flatten_observation.py",
"chars": 1092,
"preview": "\"\"\"Wrapper for flattening observations of an environment.\"\"\"\nimport gym\nimport gym.spaces as spaces\n\n\nclass FlattenObser"
},
{
"path": "gym/wrappers/frame_stack.py",
"chars": 6322,
"preview": "\"\"\"Wrapper that stacks frames.\"\"\"\nfrom collections import deque\nfrom typing import Union\n\nimport numpy as np\n\nimport gym"
},
{
"path": "gym/wrappers/gray_scale_observation.py",
"chars": 2079,
"preview": "\"\"\"Wrapper that converts a color observation to grayscale.\"\"\"\nimport numpy as np\n\nimport gym\nfrom gym.spaces import Box\n"
},
{
"path": "gym/wrappers/human_rendering.py",
"chars": 5051,
"preview": "\"\"\"A wrapper that adds human-renering functionality to an environment.\"\"\"\nimport numpy as np\n\nimport gym\nfrom gym.error "
},
{
"path": "gym/wrappers/monitoring/__init__.py",
"chars": 44,
"preview": "\"\"\"Module for monitoring.video_recorder.\"\"\"\n"
},
{
"path": "gym/wrappers/monitoring/video_recorder.py",
"chars": 6362,
"preview": "\"\"\"A wrapper for video recording environments by rolling it out, frame by frame.\"\"\"\nimport json\nimport os\nimport os.path"
},
{
"path": "gym/wrappers/normalize.py",
"chars": 5712,
"preview": "\"\"\"Set of wrappers for normalizing actions and observations.\"\"\"\nimport numpy as np\n\nimport gym\n\n\n# taken from https://gi"
},
{
"path": "gym/wrappers/order_enforcing.py",
"chars": 2158,
"preview": "\"\"\"Wrapper to enforce the proper ordering of environment operations.\"\"\"\nimport gym\nfrom gym.error import ResetNeeded\n\n\nc"
},
{
"path": "gym/wrappers/pixel_observation.py",
"chars": 8049,
"preview": "\"\"\"Wrapper for augmenting observations by pixel values.\"\"\"\nimport collections\nimport copy\nfrom collections.abc import Mu"
},
{
"path": "gym/wrappers/record_episode_statistics.py",
"chars": 5650,
"preview": "\"\"\"Wrapper that tracks the cumulative rewards and episode lengths.\"\"\"\nimport time\nfrom collections import deque\nfrom typ"
},
{
"path": "gym/wrappers/record_video.py",
"chars": 8310,
"preview": "\"\"\"Wrapper for recording videos.\"\"\"\nimport os\nfrom typing import Callable, Optional\n\nimport gym\nfrom gym import logger\nf"
},
{
"path": "gym/wrappers/render_collection.py",
"chars": 1804,
"preview": "\"\"\"A wrapper that adds render collection mode to an environment.\"\"\"\nimport gym\n\n\nclass RenderCollection(gym.Wrapper):\n "
},
{
"path": "gym/wrappers/rescale_action.py",
"chars": 3100,
"preview": "\"\"\"Wrapper for rescaling actions to within a max and min action.\"\"\"\nfrom typing import Union\n\nimport numpy as np\n\nimport"
},
{
"path": "gym/wrappers/resize_observation.py",
"chars": 2399,
"preview": "\"\"\"Wrapper for resizing observations.\"\"\"\nfrom typing import Union\n\nimport numpy as np\n\nimport gym\nfrom gym.error import "
},
{
"path": "gym/wrappers/step_api_compatibility.py",
"chars": 2649,
"preview": "\"\"\"Implementation of StepAPICompatibility wrapper class for transforming envs between new and old step API.\"\"\"\nimport gy"
},
{
"path": "gym/wrappers/time_aware_observation.py",
"chars": 2402,
"preview": "\"\"\"Wrapper for adding time aware observations to environment observation.\"\"\"\nimport numpy as np\n\nimport gym\nfrom gym.spa"
},
{
"path": "gym/wrappers/time_limit.py",
"chars": 2526,
"preview": "\"\"\"Wrapper for limiting the time steps of an environment.\"\"\"\nfrom typing import Optional\n\nimport gym\n\n\nclass TimeLimit(g"
},
{
"path": "gym/wrappers/transform_observation.py",
"chars": 1672,
"preview": "\"\"\"Wrapper for transforming observations.\"\"\"\nfrom typing import Any, Callable\n\nimport gym\n\n\nclass TransformObservation(g"
},
{
"path": "gym/wrappers/transform_reward.py",
"chars": 1332,
"preview": "\"\"\"Wrapper for transforming the reward.\"\"\"\nfrom typing import Callable\n\nimport gym\nfrom gym import RewardWrapper\n\n\nclass"
},
{
"path": "gym/wrappers/vector_list_info.py",
"chars": 3821,
"preview": "\"\"\"Wrapper that converts the info format for vec envs into the list format.\"\"\"\n\nfrom typing import List\n\nimport gym\n\n\ncl"
},
{
"path": "py.Dockerfile",
"chars": 1016,
"preview": "# A Dockerfile that sets up a full Gym install with test dependencies\nARG PYTHON_VERSION\nFROM python:$PYTHON_VERSION\n\nSH"
},
{
"path": "pyproject.toml",
"chars": 1263,
"preview": "[tool.pyright]\n\ninclude = [\n \"gym/**\",\n \"tests/**\"\n]\n\nexclude = [\n \"**/node_modules\",\n \"**/__pycache__\",\n]\n\n"
},
{
"path": "requirements.txt",
"chars": 334,
"preview": "numpy>=1.18.0\ncloudpickle>=1.2.0\nimportlib_metadata>=4.8.0; python_version < '3.10'\ngym_notices>=0.0.4\ndataclasses==0.8;"
},
{
"path": "setup.py",
"chars": 3048,
"preview": "\"\"\"Setups the project.\"\"\"\nimport itertools\nimport re\n\nfrom setuptools import find_packages, setup\n\nwith open(\"gym/versio"
},
{
"path": "test_requirements.txt",
"chars": 140,
"preview": "box2d-py==2.3.5\nlz4>=3.1.0\nopencv-python>=3.0\nmujoco==2.2.0\nmatplotlib>=3.0\nimageio>=2.14.1\npygame==2.1.0\nmujoco_py<2.2,"
},
{
"path": "tests/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "tests/envs/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "tests/envs/test_action_dim_check.py",
"chars": 4045,
"preview": "import numpy as np\nimport pytest\n\nimport gym\nfrom gym import spaces\nfrom gym.envs.registration import EnvSpec\nfrom tests"
},
{
"path": "tests/envs/test_compatibility.py",
"chars": 3885,
"preview": "import sys\nfrom typing import Any, Dict, Optional, Tuple\n\nimport numpy as np\n\nimport gym\nfrom gym.spaces import Discrete"
},
{
"path": "tests/envs/test_env_implementation.py",
"chars": 7592,
"preview": "from typing import Optional\n\nimport numpy as np\nimport pytest\n\nimport gym\nfrom gym.envs.box2d import BipedalWalker\nfrom "
},
{
"path": "tests/envs/test_envs.py",
"chars": 7474,
"preview": "import pickle\nimport warnings\n\nimport numpy as np\nimport pytest\n\nimport gym\nfrom gym.envs.registration import EnvSpec\nfr"
},
{
"path": "tests/envs/test_make.py",
"chars": 10418,
"preview": "\"\"\"Tests that gym.make works as expected.\"\"\"\n\nimport re\nimport warnings\nfrom copy import deepcopy\n\nimport numpy as np\nim"
},
{
"path": "tests/envs/test_mujoco.py",
"chars": 4833,
"preview": "import numpy as np\nimport pytest\n\nimport gym\nfrom gym import envs\nfrom gym.envs.registration import EnvSpec\nfrom tests.e"
},
{
"path": "tests/envs/test_register.py",
"chars": 6627,
"preview": "\"\"\"Tests that `gym.register` works as expected.\"\"\"\nimport re\nfrom typing import Optional\n\nimport pytest\n\nimport gym\n\n\n@p"
},
{
"path": "tests/envs/test_spec.py",
"chars": 2686,
"preview": "\"\"\"Tests that gym.spec works as expected.\"\"\"\n\nimport re\n\nimport pytest\n\nimport gym\n\n\ndef test_spec():\n spec = gym.spe"
},
{
"path": "tests/envs/utils.py",
"chars": 2770,
"preview": "\"\"\"Finds all the specs that we can test with\"\"\"\nfrom typing import List, Optional\n\nimport numpy as np\n\nimport gym\nfrom g"
},
{
"path": "tests/envs/utils_envs.py",
"chars": 1382,
"preview": "import gym\n\n\nclass RegisterDuringMakeEnv(gym.Env):\n \"\"\"Used in `test_registration.py` to check if `env.make` can impo"
},
{
"path": "tests/spaces/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "tests/spaces/test_box.py",
"chars": 10945,
"preview": "import re\nimport warnings\n\nimport numpy as np\nimport pytest\n\nimport gym.error\nfrom gym.spaces import Box\nfrom gym.spaces"
},
{
"path": "tests/spaces/test_dict.py",
"chars": 4135,
"preview": "from collections import OrderedDict\n\nimport numpy as np\nimport pytest\n\nfrom gym.spaces import Box, Dict, Discrete\n\n\ndef "
},
{
"path": "tests/spaces/test_discrete.py",
"chars": 1167,
"preview": "import numpy as np\n\nfrom gym.spaces import Discrete\n\n\ndef test_space_legacy_pickling():\n \"\"\"Test the legacy pickle of"
},
{
"path": "tests/spaces/test_graph.py",
"chars": 4111,
"preview": "import re\n\nimport numpy as np\nimport pytest\n\nfrom gym.spaces import Discrete, Graph, GraphInstance\n\n\ndef test_node_space"
},
{
"path": "tests/spaces/test_multibinary.py",
"chars": 603,
"preview": "import numpy as np\n\nfrom gym.spaces import MultiBinary\n\n\ndef test_sample():\n space = MultiBinary(4)\n\n sample = spa"
},
{
"path": "tests/spaces/test_multidiscrete.py",
"chars": 2507,
"preview": "import pytest\n\nfrom gym.spaces import Discrete, MultiDiscrete\nfrom gym.utils.env_checker import data_equivalence\n\n\ndef t"
},
{
"path": "tests/spaces/test_sequence.py",
"chars": 1673,
"preview": "import re\n\nimport numpy as np\nimport pytest\n\nimport gym.spaces\n\n\ndef test_sample():\n \"\"\"Tests the sequence sampling w"
},
{
"path": "tests/spaces/test_space.py",
"chars": 561,
"preview": "from functools import partial\n\nimport pytest\n\nfrom gym import Space\nfrom gym.spaces import utils\n\nTESTING_SPACE = Space("
},
{
"path": "tests/spaces/test_spaces.py",
"chars": 18219,
"preview": "import copy\nimport itertools\nimport json # note: ujson fails this test due to float equality\nimport pickle\nimport tempf"
},
{
"path": "tests/spaces/test_text.py",
"chars": 1091,
"preview": "import re\n\nimport numpy as np\nimport pytest\n\nfrom gym.spaces import Text\n\n\ndef test_sample_mask():\n space = Text(min_"
},
{
"path": "tests/spaces/test_tuple.py",
"chars": 3035,
"preview": "import numpy as np\nimport pytest\n\nimport gym.spaces\nfrom gym.spaces import Box, Dict, Discrete, MultiBinary, Tuple\nfrom "
},
{
"path": "tests/spaces/test_utils.py",
"chars": 3718,
"preview": "from itertools import zip_longest\nfrom typing import Optional\n\nimport numpy as np\nimport pytest\n\nimport gym\nfrom gym.spa"
},
{
"path": "tests/spaces/utils.py",
"chars": 3047,
"preview": "from typing import List\n\nimport numpy as np\n\nfrom gym.spaces import (\n Box,\n Dict,\n Discrete,\n Graph,\n Mu"
},
{
"path": "tests/test_core.py",
"chars": 4240,
"preview": "from typing import Optional\n\nimport numpy as np\nimport pytest\n\nfrom gym import core, spaces\nfrom gym.wrappers import Ord"
},
{
"path": "tests/testing_env.py",
"chars": 3161,
"preview": "\"\"\"Provides a generic testing environment for use in tests with custom reset, step and render functions.\"\"\"\nimport types"
},
{
"path": "tests/utils/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "tests/utils/test_env_checker.py",
"chars": 9325,
"preview": "\"\"\"Tests that the `env_checker` runs as expects and all errors are possible.\"\"\"\nimport re\nimport warnings\nfrom typing im"
},
{
"path": "tests/utils/test_passive_env_checker.py",
"chars": 17819,
"preview": "import re\nimport warnings\nfrom typing import Dict, Union\n\nimport numpy as np\nimport pytest\n\nimport gym\nfrom gym import s"
},
{
"path": "tests/utils/test_play.py",
"chars": 6763,
"preview": "from functools import partial\nfrom itertools import product\nfrom typing import Callable\n\nimport numpy as np\nimport pygam"
},
{
"path": "tests/utils/test_save_video.py",
"chars": 3573,
"preview": "import os\nimport shutil\n\nimport numpy as np\n\nimport gym\nfrom gym.utils.save_video import capped_cubic_video_schedule, sa"
},
{
"path": "tests/utils/test_seeding.py",
"chars": 721,
"preview": "import pickle\n\nfrom gym import error\nfrom gym.utils import seeding\n\n\ndef test_invalid_seeds():\n for seed in [-1, \"tes"
},
{
"path": "tests/utils/test_step_api_compatibility.py",
"chars": 6019,
"preview": "import numpy as np\nimport pytest\n\nfrom gym.utils.env_checker import data_equivalence\nfrom gym.utils.step_api_compatibili"
},
{
"path": "tests/vector/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "tests/vector/test_async_vector_env.py",
"chars": 10139,
"preview": "import re\nfrom multiprocessing import TimeoutError\n\nimport numpy as np\nimport pytest\n\nfrom gym.error import AlreadyPendi"
},
{
"path": "tests/vector/test_numpy_utils.py",
"chars": 5025,
"preview": "from collections import OrderedDict\n\nimport numpy as np\nimport pytest\n\nfrom gym.spaces import Dict, Tuple\nfrom gym.vecto"
},
{
"path": "tests/vector/test_shared_memory.py",
"chars": 5713,
"preview": "import multiprocessing as mp\nfrom collections import OrderedDict\nfrom multiprocessing import Array, Process\nfrom multipr"
},
{
"path": "tests/vector/test_spaces.py",
"chars": 6908,
"preview": "import copy\n\nimport numpy as np\nimport pytest\nfrom numpy.testing import assert_array_equal\n\nfrom gym.spaces import Box, "
},
{
"path": "tests/vector/test_sync_vector_env.py",
"chars": 5756,
"preview": "import numpy as np\nimport pytest\n\nfrom gym.envs.registration import EnvSpec\nfrom gym.spaces import Box, Discrete, MultiD"
},
{
"path": "tests/vector/test_vector_env.py",
"chars": 4274,
"preview": "from functools import partial\n\nimport numpy as np\nimport pytest\n\nfrom gym.spaces import Discrete, Tuple\nfrom gym.vector."
},
{
"path": "tests/vector/test_vector_env_info.py",
"chars": 2180,
"preview": "import numpy as np\nimport pytest\n\nimport gym\nfrom gym.vector.sync_vector_env import SyncVectorEnv\nfrom tests.vector.util"
},
{
"path": "tests/vector/test_vector_env_wrapper.py",
"chars": 972,
"preview": "import numpy as np\n\nfrom gym.vector import VectorEnvWrapper, make\n\n\nclass DummyWrapper(VectorEnvWrapper):\n def __init"
},
{
"path": "tests/vector/test_vector_make.py",
"chars": 2726,
"preview": "import pytest\n\nimport gym\nfrom gym.vector import AsyncVectorEnv, SyncVectorEnv\nfrom gym.wrappers import OrderEnforcing, "
},
{
"path": "tests/vector/utils.py",
"chars": 3834,
"preview": "import time\nfrom typing import Optional\n\nimport numpy as np\n\nimport gym\nfrom gym.spaces import Box, Dict, Discrete, Mult"
},
{
"path": "tests/wrappers/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "tests/wrappers/test_atari_preprocessing.py",
"chars": 4102,
"preview": "import numpy as np\nimport pytest\n\nfrom gym.spaces import Box, Discrete\nfrom gym.wrappers import AtariPreprocessing, Step"
},
{
"path": "tests/wrappers/test_autoreset.py",
"chars": 4731,
"preview": "\"\"\"Tests the gym.wrapper.AutoResetWrapper operates as expected.\"\"\"\nfrom typing import Generator, Optional\nfrom unittest."
},
{
"path": "tests/wrappers/test_clip_action.py",
"chars": 790,
"preview": "import numpy as np\n\nimport gym\nfrom gym.wrappers import ClipAction\n\n\ndef test_clip_action():\n # mountaincar: action-b"
},
{
"path": "tests/wrappers/test_filter_observation.py",
"chars": 2972,
"preview": "from typing import Optional, Tuple\n\nimport numpy as np\nimport pytest\n\nimport gym\nfrom gym import spaces\nfrom gym.wrapper"
},
{
"path": "tests/wrappers/test_flatten.py",
"chars": 3314,
"preview": "\"\"\"Tests for the flatten observation wrapper.\"\"\"\n\nfrom collections import OrderedDict\nfrom typing import Optional\n\nimpor"
},
{
"path": "tests/wrappers/test_flatten_observation.py",
"chars": 702,
"preview": "import numpy as np\nimport pytest\n\nimport gym\nfrom gym import spaces\nfrom gym.wrappers import FlattenObservation\n\n\n@pytes"
},
{
"path": "tests/wrappers/test_frame_stack.py",
"chars": 1457,
"preview": "import numpy as np\nimport pytest\n\nimport gym\nfrom gym.wrappers import FrameStack\n\ntry:\n import lz4\nexcept ImportError"
}
]
// ... and 19 more files (download for full content)
About this extraction
This page contains the full source code of the openai/gym GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 219 files (1.1 MB), approximately 292.5k tokens, and a symbol index with 1374 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.