gitextract_kv5056s4/

├── .gitignore
├── LICENSE
├── README.md
├── bc/
│   ├── README.md
│   ├── bash_scripts/
│   │   ├── demo.bash
│   │   ├── gen_exp_data.sh
│   │   └── runbc_allmujoco.sh
│   ├── bc.py
│   ├── experts/
│   │   ├── Ant-v1.pkl
│   │   ├── HalfCheetah-v1.pkl
│   │   ├── Hopper-v1.pkl
│   │   ├── Humanoid-v1.pkl
│   │   ├── Reacher-v1.pkl
│   │   └── Walker2d-v1.pkl
│   ├── load_policy.py
│   ├── plot_bc.py
│   ├── random_logs/
│   │   └── gen_exp_data.text
│   ├── run_expert.py
│   └── tf_util.py
├── ddpg/
│   ├── README.md
│   ├── ddpg.py
│   ├── main.py
│   └── replay_buffer.py
├── dqn/
│   ├── README.md
│   ├── atari_wrappers.py
│   ├── dqn.py
│   ├── dqn_utils.py
│   ├── logs_pkls/
│   │   ├── BeamRider_s001.pkl
│   │   ├── BeamRider_s002.pkl
│   │   ├── Breakout_s001.pkl
│   │   ├── Breakout_s002.pkl
│   │   ├── Enduro_s001.pkl
│   │   ├── Enduro_s002.pkl
│   │   ├── Pong_s001.pkl
│   │   └── Pong_s002.pkl
│   ├── logs_text/
│   │   ├── BeamRider_s001.text
│   │   ├── BeamRider_s002.text
│   │   ├── Breakout_s001.text
│   │   ├── Breakout_s002.text
│   │   ├── Enduro_s001.text
│   │   ├── Enduro_s002.text
│   │   ├── Pong_s001.text
│   │   └── Pong_s002.text
│   ├── plot_dqn.py
│   ├── run_dqn_atari.py
│   └── run_dqn_ram.py
├── es/
│   ├── README.md
│   ├── bash_scripts/
│   │   └── InvertedPendulum-v1.sh
│   ├── es.py
│   ├── logz.py
│   ├── main.py
│   ├── optimizers.py
│   ├── plot.py
│   ├── test.py
│   ├── toy_es.py
│   └── utils.py
├── g_learning/
│   ├── G-Learning.py
│   ├── README.md
│   └── __init__.py
├── lib/
│   ├── __init__.py
│   ├── envs/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── blackjack.py
│   │   ├── cliff_walking.py
│   │   ├── gridworld.py
│   │   ├── two_room_domain.py
│   │   └── windy_gridworld.py
│   └── plotting.py
├── q_learning/
│   ├── Q-Learning.py
│   ├── README.md
│   └── __init__.py
├── trpo/
│   ├── README.md
│   ├── fxn_approx.py
│   ├── main.py
│   ├── trpo.py
│   └── utils_trpo.py
├── utils/
│   ├── __init__.py
│   ├── logz.py
│   ├── policies.py
│   ├── utils_pg.py
│   └── value_functions.py
└── vpg/
    ├── README.md
    ├── bash_scripts/
    │   ├── CartPole-v0.sh
    │   ├── Pendulum-v0.sh
    │   ├── halfcheetah.sh
    │   ├── hopper.sh
    │   └── walker.sh
    ├── main.py
    └── plot_learning_curves.py