gitextract_z9q4ijrb/

├── .devcontainer/
│   └── devcontainer.json
├── .dockerignore
├── .github/
│   ├── ISSUE_TEMPLATE.md
│   ├── PULL_REQUEST_TEMPLATE.md
│   └── workflows/
│       ├── extra_sys.yml
│       ├── gputest.yml
│       ├── lint_and_docs.yml
│       ├── publish.yaml
│       └── pytest.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── Dockerfile
├── LICENSE
├── MANIFEST.in
├── README.md
├── benchmark/
│   └── run_benchmark.py
├── docs/
│   ├── .gitignore
│   ├── 01_user_guide/
│   │   ├── 00_training_process.md
│   │   ├── 01_apis.md
│   │   ├── 02_core_abstractions.md
│   │   └── index.rst
│   ├── 02_deep_dives/
│   │   ├── 0_intro.md
│   │   ├── L1_Batch.ipynb
│   │   ├── L2_Buffer.ipynb
│   │   ├── L3_Environments.ipynb
│   │   ├── L4_GAE.ipynb
│   │   ├── L5_Collector.ipynb
│   │   └── L6_MARL.ipynb
│   ├── 04_benchmarks/
│   │   └── benchmarks.rst
│   ├── 05_developer_guide/
│   │   └── developer_guide.md
│   ├── 06_contributors/
│   │   └── contributors.rst
│   ├── _config.yml
│   ├── _static/
│   │   ├── css/
│   │   │   └── style.css
│   │   └── js/
│   │       ├── benchmark.js
│   │       ├── copybutton.js
│   │       ├── mujoco/
│   │       │   └── benchmark/
│   │       │       └── Ant-v4/
│   │       │           └── results.json
│   │       ├── v5.json
│   │       ├── vega-embed@5.js
│   │       ├── vega-lite@5.js
│   │       └── vega@5.js
│   ├── autogen_rst.py
│   ├── bibtex.json
│   ├── create_toc.py
│   ├── index.rst
│   ├── nbstripout.py
│   └── refs.bib
├── examples/
│   ├── __init__.py
│   ├── atari/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── atari_c51.py
│   │   ├── atari_dqn.py
│   │   ├── atari_dqn_hl.py
│   │   ├── atari_fqf.py
│   │   ├── atari_iqn.py
│   │   ├── atari_iqn_hl.py
│   │   ├── atari_ppo.py
│   │   ├── atari_ppo_hl.py
│   │   ├── atari_qrdqn.py
│   │   ├── atari_rainbow.py
│   │   ├── atari_sac.py
│   │   └── atari_sac_hl.py
│   ├── box2d/
│   │   ├── README.md
│   │   ├── acrobot_dualdqn.py
│   │   ├── bipedal_bdq.py
│   │   ├── bipedal_hardcore_sac.py
│   │   ├── lunarlander_dqn.py
│   │   └── mcc_sac.py
│   ├── discrete/
│   │   ├── discrete_dqn.py
│   │   └── discrete_dqn_hl.py
│   ├── inverse/
│   │   ├── README.md
│   │   └── irl_gail.py
│   ├── modelbased/
│   │   └── README.md
│   ├── mujoco/
│   │   ├── README.md
│   │   ├── analysis.py
│   │   ├── fetch_her_ddpg.py
│   │   ├── mujoco_a2c.py
│   │   ├── mujoco_a2c_hl.py
│   │   ├── mujoco_ddpg.py
│   │   ├── mujoco_ddpg_hl.py
│   │   ├── mujoco_env.py
│   │   ├── mujoco_npg.py
│   │   ├── mujoco_npg_hl.py
│   │   ├── mujoco_ppo.py
│   │   ├── mujoco_ppo_hl.py
│   │   ├── mujoco_redq.py
│   │   ├── mujoco_redq_hl.py
│   │   ├── mujoco_reinforce.py
│   │   ├── mujoco_reinforce_hl.py
│   │   ├── mujoco_sac.py
│   │   ├── mujoco_sac_hl.py
│   │   ├── mujoco_td3.py
│   │   ├── mujoco_td3_hl.py
│   │   ├── mujoco_trpo.py
│   │   ├── mujoco_trpo_hl.py
│   │   ├── plotter.py
│   │   └── tools.py
│   ├── offline/
│   │   ├── README.md
│   │   ├── atari_bcq.py
│   │   ├── atari_cql.py
│   │   ├── atari_crr.py
│   │   ├── atari_il.py
│   │   ├── convert_rl_unplugged_atari.py
│   │   ├── d4rl_bcq.py
│   │   ├── d4rl_cql.py
│   │   ├── d4rl_il.py
│   │   ├── d4rl_td3_bc.py
│   │   └── utils.py
│   └── vizdoom/
│       ├── .gitignore
│       ├── README.md
│       ├── env.py
│       ├── maps/
│       │   ├── D1_basic.cfg
│       │   ├── D1_basic.wad
│       │   ├── D2_navigation.cfg
│       │   ├── D2_navigation.wad
│       │   ├── D3_battle.cfg
│       │   ├── D3_battle.wad
│       │   ├── D4_battle2.cfg
│       │   ├── D4_battle2.wad
│       │   ├── README.md
│       │   └── spectator.py
│       ├── replay.py
│       ├── vizdoom_c51.py
│       └── vizdoom_ppo.py
├── pyproject.toml
├── test/
│   ├── __init__.py
│   ├── base/
│   │   ├── __init__.py
│   │   ├── env.py
│   │   ├── test_action_space_sampling.py
│   │   ├── test_batch.py
│   │   ├── test_buffer.py
│   │   ├── test_collector.py
│   │   ├── test_env.py
│   │   ├── test_env_finite.py
│   │   ├── test_logger.py
│   │   ├── test_policy.py
│   │   ├── test_returns.py
│   │   ├── test_stats.py
│   │   └── test_utils.py
│   ├── continuous/
│   │   ├── __init__.py
│   │   ├── test_ddpg.py
│   │   ├── test_npg.py
│   │   ├── test_ppo.py
│   │   ├── test_redq.py
│   │   ├── test_sac_with_il.py
│   │   ├── test_td3.py
│   │   └── test_trpo.py
│   ├── determinism_test.py
│   ├── discrete/
│   │   ├── __init__.py
│   │   ├── test_a2c_with_il.py
│   │   ├── test_bdqn.py
│   │   ├── test_c51.py
│   │   ├── test_discrete_sac.py
│   │   ├── test_dqn.py
│   │   ├── test_drqn.py
│   │   ├── test_fqf.py
│   │   ├── test_iqn.py
│   │   ├── test_ppo_discrete.py
│   │   ├── test_qrdqn.py
│   │   ├── test_rainbow.py
│   │   └── test_reinforce.py
│   ├── highlevel/
│   │   ├── __init__.py
│   │   ├── env_factory.py
│   │   └── test_experiment_builder.py
│   ├── modelbased/
│   │   ├── __init__.py
│   │   ├── test_dqn_icm.py
│   │   ├── test_ppo_icm.py
│   │   └── test_psrl.py
│   ├── offline/
│   │   ├── __init__.py
│   │   ├── gather_cartpole_data.py
│   │   ├── gather_pendulum_data.py
│   │   ├── test_bcq.py
│   │   ├── test_cql.py
│   │   ├── test_discrete_bcq.py
│   │   ├── test_discrete_cql.py
│   │   ├── test_discrete_crr.py
│   │   ├── test_gail.py
│   │   └── test_td3_bc.py
│   └── pettingzoo/
│       ├── pistonball.py
│       ├── pistonball_continuous.py
│       ├── test_pistonball.py
│       ├── test_pistonball_continuous.py
│       ├── test_tic_tac_toe.py
│       └── tic_tac_toe.py
└── tianshou/
    ├── __init__.py
    ├── algorithm/
    │   ├── __init__.py
    │   ├── algorithm_base.py
    │   ├── imitation/
    │   │   ├── __init__.py
    │   │   ├── bcq.py
    │   │   ├── cql.py
    │   │   ├── discrete_bcq.py
    │   │   ├── discrete_cql.py
    │   │   ├── discrete_crr.py
    │   │   ├── gail.py
    │   │   ├── imitation_base.py
    │   │   └── td3_bc.py
    │   ├── modelbased/
    │   │   ├── __init__.py
    │   │   ├── icm.py
    │   │   └── psrl.py
    │   ├── modelfree/
    │   │   ├── __init__.py
    │   │   ├── a2c.py
    │   │   ├── bdqn.py
    │   │   ├── c51.py
    │   │   ├── ddpg.py
    │   │   ├── discrete_sac.py
    │   │   ├── dqn.py
    │   │   ├── fqf.py
    │   │   ├── iqn.py
    │   │   ├── npg.py
    │   │   ├── ppo.py
    │   │   ├── qrdqn.py
    │   │   ├── rainbow.py
    │   │   ├── redq.py
    │   │   ├── reinforce.py
    │   │   ├── sac.py
    │   │   ├── td3.py
    │   │   └── trpo.py
    │   ├── multiagent/
    │   │   ├── __init__.py
    │   │   └── marl.py
    │   ├── optim.py
    │   └── random.py
    ├── config.py
    ├── data/
    │   ├── __init__.py
    │   ├── batch.py
    │   ├── buffer/
    │   │   ├── __init__.py
    │   │   ├── buffer_base.py
    │   │   ├── cached.py
    │   │   ├── her.py
    │   │   ├── manager.py
    │   │   ├── prio.py
    │   │   └── vecbuf.py
    │   ├── collector.py
    │   ├── stats.py
    │   ├── types.py
    │   └── utils/
    │       ├── __init__.py
    │       ├── converter.py
    │       └── segtree.py
    ├── env/
    │   ├── __init__.py
    │   ├── atari/
    │   │   ├── atari_network.py
    │   │   └── atari_wrapper.py
    │   ├── gym_wrappers.py
    │   ├── pettingzoo_env.py
    │   ├── utils.py
    │   ├── venv_wrappers.py
    │   ├── venvs.py
    │   └── worker/
    │       ├── __init__.py
    │       ├── dummy.py
    │       ├── ray.py
    │       ├── subproc.py
    │       └── worker_base.py
    ├── evaluation/
    │   ├── __init__.py
    │   ├── launcher.py
    │   └── rliable_evaluation.py
    ├── exploration/
    │   ├── __init__.py
    │   └── random.py
    ├── highlevel/
    │   ├── __init__.py
    │   ├── algorithm.py
    │   ├── config.py
    │   ├── env.py
    │   ├── experiment.py
    │   ├── logger.py
    │   ├── module/
    │   │   ├── __init__.py
    │   │   ├── actor.py
    │   │   ├── core.py
    │   │   ├── critic.py
    │   │   ├── intermediate.py
    │   │   └── special.py
    │   ├── params/
    │   │   ├── __init__.py
    │   │   ├── algorithm_params.py
    │   │   ├── algorithm_wrapper.py
    │   │   ├── alpha.py
    │   │   ├── collector.py
    │   │   ├── dist_fn.py
    │   │   ├── env_param.py
    │   │   ├── lr_scheduler.py
    │   │   ├── noise.py
    │   │   └── optim.py
    │   ├── persistence.py
    │   ├── trainer.py
    │   └── world.py
    ├── py.typed
    ├── trainer.py
    └── utils/
        ├── __init__.py
        ├── conversion.py
        ├── determinism.py
        ├── lagged_network.py
        ├── logger/
        │   ├── __init__.py
        │   ├── logger_base.py
        │   ├── tensorboard.py
        │   └── wandb.py
        ├── logging.py
        ├── net/
        │   ├── __init__.py
        │   ├── common.py
        │   ├── continuous.py
        │   └── discrete.py
        ├── print.py
        ├── progress_bar.py
        ├── space_info.py
        ├── statistics.py
        ├── torch_utils.py
        └── warning.py