SYMBOL INDEX (232 symbols across 17 files) FILE: Chapter03/frozenlake8x8_policyiteration.py function eval_state_action (line 4) | def eval_state_action(V, s, a, gamma=0.99): function policy_evaluation (line 7) | def policy_evaluation(V, policy, eps=0.0001): function policy_improvement (line 23) | def policy_improvement(V, policy): function run_episodes (line 38) | def run_episodes(env, policy, num_games=100): FILE: Chapter03/frozenlake8x8_valueiteration.py function eval_state_action (line 4) | def eval_state_action(V, s, a, gamma=0.99): function value_iteration (line 7) | def value_iteration(eps=0.0001): function run_episodes (line 30) | def run_episodes(env, V, num_games=100): FILE: Chapter04/SARSA Q_learning Taxi-v2.py function eps_greedy (line 5) | def eps_greedy(Q, s, eps=0.1): function greedy (line 17) | def greedy(Q, s): function run_episodes (line 26) | def run_episodes(env, Q, num_episodes=100, to_print=False): function Q_learning (line 52) | def Q_learning(env, lr=0.01, num_episodes=10000, eps=0.3, gamma=0.95, ep... function SARSA (line 95) | def SARSA(env, lr=0.01, num_episodes=10000, eps=0.3, gamma=0.95, eps_dec... FILE: Chapter05/DQN_Atari.py function cnn (line 16) | def cnn(x): function fnn (line 25) | def fnn(x, hidden_layers, output_layer, activation=tf.nn.relu, last_acti... function qnet (line 33) | def qnet(x, hidden_layers, output_size, fnn_activation=tf.nn.relu, last_... class ExperienceBuffer (line 43) | class ExperienceBuffer(): method __init__ (line 47) | def __init__(self, buffer_size): method add (line 55) | def add(self, obs, rew, act, obs2, done): method sample_minibatch (line 64) | def sample_minibatch(self, batch_size): method __len__ (line 76) | def __len__(self): function q_target_values (line 80) | def q_target_values(mini_batch_rw, mini_batch_done, av, discounted_value): function greedy (line 100) | def greedy(action_values): function eps_greedy (line 106) | def eps_greedy(action_values, eps=0.1): function test_agent (line 117) | def test_agent(env_test, agent_op, num_games=20): function scale_frames (line 141) | def scale_frames(frames): function DQN (line 147) | def DQN(env_name, hidden_sizes=[32], lr=1e-2, num_epochs=2000, buffer_si... FILE: Chapter05/DQN_variations_Atari.py function cnn (line 17) | def cnn(x): function fnn (line 26) | def fnn(x, hidden_layers, output_layer, activation=tf.nn.relu, last_acti... function qnet (line 34) | def qnet(x, hidden_layers, output_size, fnn_activation=tf.nn.relu, last_... function greedy (line 43) | def greedy(action_values): function eps_greedy (line 49) | def eps_greedy(action_values, eps=0.1): function q_target_values (line 60) | def q_target_values(mini_batch_rw, mini_batch_done, av, discounted_value): function test_agent (line 80) | def test_agent(env_test, agent_op, num_games=20): function scale_frames (line 104) | def scale_frames(frames): function dueling_qnet (line 111) | def dueling_qnet(x, hidden_layers, output_size, fnn_activation=tf.nn.rel... function double_q_target_values (line 123) | def double_q_target_values(mini_batch_rw, mini_batch_done, target_qv, on... class MultiStepExperienceBuffer (line 145) | class MultiStepExperienceBuffer(): method __init__ (line 149) | def __init__(self, buffer_size, n_step, gamma): method add (line 162) | def add(self, obs, rew, act, obs2, done): method sample_minibatch (line 198) | def sample_minibatch(self, batch_size): method __len__ (line 211) | def __len__(self): function DQN_with_variations (line 214) | def DQN_with_variations(env_name, extensions_hyp, hidden_sizes=[32], lr=... FILE: Chapter05/atari_wrappers.py class NoopResetEnv (line 12) | class NoopResetEnv(gym.Wrapper): method __init__ (line 13) | def __init__(self, env, noop_max=30): method reset (line 23) | def reset(self, **kwargs): method step (line 38) | def step(self, ac): class LazyFrames (line 41) | class LazyFrames(object): method __init__ (line 42) | def __init__(self, frames): method _force (line 51) | def _force(self): method __array__ (line 57) | def __array__(self, dtype=None): method __len__ (line 63) | def __len__(self): method __getitem__ (line 66) | def __getitem__(self, i): class FireResetEnv (line 69) | class FireResetEnv(gym.Wrapper): method __init__ (line 70) | def __init__(self, env): method reset (line 76) | def reset(self, **kwargs): method step (line 86) | def step(self, ac): class MaxAndSkipEnv (line 90) | class MaxAndSkipEnv(gym.Wrapper): method __init__ (line 91) | def __init__(self, env, skip=4): method step (line 98) | def step(self, action): method reset (line 115) | def reset(self, **kwargs): class WarpFrame (line 120) | class WarpFrame(gym.ObservationWrapper): method __init__ (line 121) | def __init__(self, env): method observation (line 129) | def observation(self, frame): class FrameStack (line 136) | class FrameStack(gym.Wrapper): method __init__ (line 137) | def __init__(self, env, k): method reset (line 149) | def reset(self): method step (line 155) | def step(self, action): method _get_ob (line 160) | def _get_ob(self): class ScaledFloatFrame (line 165) | class ScaledFloatFrame(gym.ObservationWrapper): method __init__ (line 166) | def __init__(self, env): method observation (line 170) | def observation(self, observation): function make_env (line 176) | def make_env(env_name, fire=True, frames_num=2, noop_num=30, skip_frames... FILE: Chapter06/AC.py function mlp (line 8) | def mlp(x, hidden_layers, output_size, activation=tf.nn.relu, last_activ... function softmax_entropy (line 16) | def softmax_entropy(logits): function discounted_rewards (line 22) | def discounted_rewards(rews, last_sv, gamma): class Buffer (line 38) | class Buffer(): method __init__ (line 42) | def __init__(self, gamma=0.99): method store (line 49) | def store(self, temp_traj, last_sv): method get_batch (line 66) | def get_batch(self): method __len__ (line 69) | def __len__(self): function AC (line 73) | def AC(env_name, hidden_sizes=[32], ac_lr=5e-3, cr_lr=8e-3, num_epochs=5... FILE: Chapter06/REINFORCE.py function mlp (line 8) | def mlp(x, hidden_layers, output_size, activation=tf.nn.relu, last_activ... function softmax_entropy (line 16) | def softmax_entropy(logits): function discounted_rewards (line 23) | def discounted_rewards(rews, gamma): class Buffer (line 38) | class Buffer(): method __init__ (line 42) | def __init__(self, gamma=0.99): method store (line 48) | def store(self, temp_traj): method get_batch (line 63) | def get_batch(self): method __len__ (line 67) | def __len__(self): function REINFORCE (line 72) | def REINFORCE(env_name, hidden_sizes=[32], lr=5e-3, num_epochs=50, gamma... FILE: Chapter06/REINFORCE_baseline.py function mlp (line 8) | def mlp(x, hidden_layers, output_size, activation=tf.nn.relu, last_activ... function softmax_entropy (line 16) | def softmax_entropy(logits): function discounted_rewards (line 23) | def discounted_rewards(rews, gamma): class Buffer (line 38) | class Buffer(): method __init__ (line 42) | def __init__(self, gamma=0.99): method store (line 49) | def store(self, temp_traj): method get_batch (line 66) | def get_batch(self): method __len__ (line 70) | def __len__(self): function REINFORCE_baseline (line 75) | def REINFORCE_baseline(env_name, hidden_sizes=[32], p_lr=5e-3, vf_lr=8e-... FILE: Chapter07/PPO.py function mlp (line 8) | def mlp(x, hidden_layers, output_layer, activation=tf.tanh, last_activat... function softmax_entropy (line 16) | def softmax_entropy(logits): function clipped_surrogate_obj (line 22) | def clipped_surrogate_obj(new_p, old_p, adv, eps): function GAE (line 29) | def GAE(rews, v, v_last, gamma=0.99, lam=0.95): function discounted_rewards (line 39) | def discounted_rewards(rews, last_sv, gamma): class StructEnv (line 56) | class StructEnv(gym.Wrapper): method __init__ (line 60) | def __init__(self, env): method reset (line 66) | def reset(self, **kwargs): method step (line 72) | def step(self, action): method get_episode_reward (line 78) | def get_episode_reward(self): method get_episode_length (line 81) | def get_episode_length(self): class Buffer (line 84) | class Buffer(): method __init__ (line 88) | def __init__(self, gamma=0.99, lam=0.95): method store (line 96) | def store(self, temp_traj, last_sv): method get_batch (line 113) | def get_batch(self): method __len__ (line 118) | def __len__(self): function gaussian_log_likelihood (line 122) | def gaussian_log_likelihood(x, mean, log_std): function PPO (line 129) | def PPO(env_name, hidden_sizes=[32], cr_lr=5e-3, ac_lr=5e-3, num_epochs=... FILE: Chapter07/TRPO.py function mlp (line 7) | def mlp(x, hidden_layers, output_layer, activation=tf.tanh, last_activat... function softmax_entropy (line 15) | def softmax_entropy(logits): function gaussian_log_likelihood (line 22) | def gaussian_log_likelihood(ac, mean, log_std): function conjugate_gradient (line 30) | def conjugate_gradient(A, b, x=None, iters=10): function gaussian_DKL (line 52) | def gaussian_DKL(mu_q, log_std_q, mu_p, log_std_p): function backtracking_line_search (line 59) | def backtracking_line_search(Dkl, delta, old_loss, p=0.8): function GAE (line 81) | def GAE(rews, v, v_last, gamma=0.99, lam=0.95): function discounted_rewards (line 91) | def discounted_rewards(rews, last_sv, gamma): class Buffer (line 107) | class Buffer(): method __init__ (line 111) | def __init__(self, gamma=0.99, lam=0.95): method store (line 119) | def store(self, temp_traj, last_sv): method get_batch (line 136) | def get_batch(self): method __len__ (line 141) | def __len__(self): function flatten_list (line 145) | def flatten_list(tensor_list): function flatten (line 151) | def flatten(tensor): class StructEnv (line 158) | class StructEnv(gym.Wrapper): method __init__ (line 162) | def __init__(self, env): method reset (line 168) | def reset(self, **kwargs): method step (line 174) | def step(self, action): method get_episode_reward (line 180) | def get_episode_reward(self): method get_episode_length (line 183) | def get_episode_length(self): function TRPO (line 187) | def TRPO(env_name, hidden_sizes=[32], cr_lr=5e-3, num_epochs=50, gamma=0... FILE: Chapter08/DDPG.py function mlp (line 10) | def mlp(x, hidden_layers, output_layer, activation=tf.nn.relu, last_acti... function deterministic_actor_critic (line 18) | def deterministic_actor_critic(x, a, hidden_sizes, act_dim, max_act): class ExperiencedBuffer (line 36) | class ExperiencedBuffer(): method __init__ (line 40) | def __init__(self, buffer_size): method add (line 49) | def add(self, obs, rew, act, obs2, done): method sample_minibatch (line 60) | def sample_minibatch(self, batch_size): method __len__ (line 74) | def __len__(self): function test_agent (line 77) | def test_agent(env_test, agent_op, num_games=10): function DDPG (line 98) | def DDPG(env_name, hidden_sizes=[32], ac_lr=1e-2, cr_lr=1e-2, num_epochs... FILE: Chapter08/TD3.py function mlp (line 10) | def mlp(x, hidden_layers, output_layer, activation=tf.nn.relu, last_acti... function deterministic_actor_double_critic (line 19) | def deterministic_actor_double_critic(x, a, hidden_sizes, act_dim, max_a... class ExperiencedBuffer (line 42) | class ExperiencedBuffer(): method __init__ (line 46) | def __init__(self, buffer_size): method add (line 55) | def add(self, obs, rew, act, obs2, done): method sample_minibatch (line 66) | def sample_minibatch(self, batch_size): method __len__ (line 80) | def __len__(self): function test_agent (line 83) | def test_agent(env_test, agent_op, num_games=10): function TD3 (line 107) | def TD3(env_name, hidden_sizes=[32], ac_lr=1e-2, cr_lr=1e-2, num_epochs=... FILE: Chapter09/ME-TRPO.py function mlp (line 8) | def mlp(x, hidden_layers, output_layer, activation=tf.tanh, last_activat... function softmax_entropy (line 16) | def softmax_entropy(logits): function gaussian_log_likelihood (line 22) | def gaussian_log_likelihood(ac, mean, log_std): function conjugate_gradient (line 29) | def conjugate_gradient(A, b, x=None, iters=10): function gaussian_DKL (line 51) | def gaussian_DKL(mu_q, log_std_q, mu_p, log_std_p): function backtracking_line_search (line 57) | def backtracking_line_search(Dkl, delta, old_loss, p=0.8): function GAE (line 77) | def GAE(rews, v, v_last, gamma=0.99, lam=0.95): function discounted_rewards (line 87) | def discounted_rewards(rews, last_sv, gamma): function flatten_list (line 103) | def flatten_list(tensor_list): function flatten (line 109) | def flatten(tensor): function test_agent (line 116) | def test_agent(env_test, agent_op, num_games=10): class Buffer (line 135) | class Buffer(): method __init__ (line 139) | def __init__(self, gamma=0.99, lam=0.95): method store (line 147) | def store(self, temp_traj, last_sv): method get_batch (line 164) | def get_batch(self): method __len__ (line 169) | def __len__(self): class FullBuffer (line 174) | class FullBuffer(): method __init__ (line 175) | def __init__(self): method store (line 187) | def store(self, obs, act, rew, nxt_obs, done): method generate_random_dataset (line 196) | def generate_random_dataset(self): method get_training_batch (line 203) | def get_training_batch(self): method get_valid_batch (line 207) | def get_valid_batch(self): method __len__ (line 210) | def __len__(self): function simulate_environment (line 216) | def simulate_environment(env, policy, simulated_steps): class NetworkEnv (line 253) | class NetworkEnv(gym.Wrapper): method __init__ (line 254) | def __init__(self, env, model_func, reward_func, done_func, number_mod... method reset (line 262) | def reset(self, **kwargs): method step (line 268) | def step(self, action): class StructEnv (line 283) | class StructEnv(gym.Wrapper): method __init__ (line 287) | def __init__(self, env): method reset (line 293) | def reset(self, **kwargs): method step (line 299) | def step(self, action): method get_episode_reward (line 305) | def get_episode_reward(self): method get_episode_length (line 308) | def get_episode_length(self): function pendulum_done (line 311) | def pendulum_done(ob): function pendulum_reward (line 314) | def pendulum_reward(ob, ac): function restore_model (line 318) | def restore_model(old_model_variables, m_variables): function METRPO (line 331) | def METRPO(env_name, hidden_sizes=[32], cr_lr=5e-3, num_epochs=50, gamma... FILE: Chapter10/DAgger.py function mlp (line 9) | def mlp(x, hidden_layers, output_layer, activation=tf.tanh, last_activat... function flappy_to_list (line 17) | def flappy_to_list(fd): function flappy_game_state (line 25) | def flappy_game_state(bol): function no_op (line 33) | def no_op(env, n_act=5): function expert (line 38) | def expert(): function test_agent (line 58) | def test_agent(policy, file_writer=None, test_games=10, step=0): function DAgger (line 90) | def DAgger(hidden_sizes=[32,32], dagger_iterations=20, p_lr=1e-3, step_i... FILE: Chapter11/ES.py function temp_seed (line 13) | def temp_seed(seed): function mlp (line 21) | def mlp(x, hidden_layers, output_layer, activation=tf.tanh, last_activat... function test_agent (line 31) | def test_agent(env_test, agent_op, num_games=1): function worker (line 53) | def worker(env_name, initial_seed, hidden_sizes, lr, std_noise, indiv_pe... function normalized_rank (line 173) | def normalized_rank(rewards): function flatten (line 183) | def flatten(tensor): function flatten_list (line 189) | def flatten_list(tensor_list): function ES (line 197) | def ES(env_name, hidden_sizes=[8,8], number_iter=1000, num_workers=4, lr... FILE: Chapter12/ESBAS.py function mlp (line 15) | def mlp(x, hidden_layers, output_layer, activation=tf.tanh, last_activat... class ExperienceBuffer (line 24) | class ExperienceBuffer(): method __init__ (line 28) | def __init__(self, buffer_size): method add (line 36) | def add(self, obs, rew, act, obs2, done): method sample_minibatch (line 45) | def sample_minibatch(self, batch_size): method __len__ (line 57) | def __len__(self): function q_target_values (line 61) | def q_target_values(mini_batch_rw, mini_batch_done, av, discounted_value): function greedy (line 80) | def greedy(action_values): function eps_greedy (line 86) | def eps_greedy(action_values, eps=0.1): function test_agent (line 97) | def test_agent(env_test, agent_op, num_games=20, summary=None): class DQN_optimization (line 122) | class DQN_optimization: method __init__ (line 123) | def __init__(self, obs_dim, act_dim, hidden_layers, lr, discount): method __build_graph (line 133) | def __build_graph(self): method __create_session (line 172) | def __create_session(self): method act (line 179) | def act(self, o): method optimize (line 185) | def optimize(self, mb_obs, mb_rew, mb_act, mb_obs2, mb_done): method update_target_network (line 193) | def update_target_network(self): class UCB1 (line 198) | class UCB1: method __init__ (line 199) | def __init__(self, algos, epsilon): method choose_algorithm (line 207) | def choose_algorithm(self): method update (line 216) | def update(self, idx_algo, traj_return): function ESBAS (line 224) | def ESBAS(env_name, hidden_sizes=[32], lr=1e-2, num_epochs=2000, buffer_...