SYMBOL INDEX (135 symbols across 29 files) FILE: assignment1/model_based_learning.py function initialize_P (line 15) | def initialize_P(nS, nA): function initialize_counts (line 34) | def initialize_counts(nS, nA): function initialize_rewards (line 53) | def initialize_rewards(nS, nA): function counts_and_rewards_to_P (line 72) | def counts_and_rewards_to_P(counts, rewards, terminal_state): function update_mdp_model_with_history (line 113) | def update_mdp_model_with_history(counts, rewards, history): function learn_with_mdp_model (line 147) | def learn_with_mdp_model(env, method=None, num_episodes=5000, gamma = 0.... function render_single (line 216) | def render_single(env, policy): function main (line 242) | def main(): FILE: assignment1/model_free_learning.py function learn_Q_QLearning (line 12) | def learn_Q_QLearning(env, num_episodes=2000, gamma=0.95, lr=0.1, e=0.8,... function learn_Q_SARSA (line 66) | def learn_Q_SARSA(env, num_episodes=2000, gamma=0.95, lr=0.1, e=0.8, dec... function render_single_Q (line 116) | def render_single_Q(env, Q): function main (line 142) | def main(): FILE: assignment1/vi_and_pi.py function value_iteration (line 12) | def value_iteration(P, nS, nA, gamma=0.9, max_iteration=20, tol=1e-3): function policy_evaluation (line 68) | def policy_evaluation(P, nS, nA, policy, gamma=0.9, max_iteration=100, t... function policy_improvement (line 111) | def policy_improvement(P, nS, nA, value_from_policy, policy, gamma=0.9): function policy_iteration (line 152) | def policy_iteration(P, nS, nA, gamma=0.9, max_iteration=200, tol=1e-3): function example (line 194) | def example(env): function render_single (line 215) | def render_single(env, policy): FILE: assignment2/configs/frozen_lake.py class config (line 1) | class config(): FILE: assignment2/configs/q2_linear.py class config (line 1) | class config(): FILE: assignment2/configs/q3_nature.py class config (line 1) | class config(): FILE: assignment2/configs/q4_train_atari_linear.py class config (line 1) | class config(): FILE: assignment2/configs/q5_train_atari_nature.py class config (line 1) | class config(): FILE: assignment2/configs/q6_bonus_question.py class config (line 1) | class config(): FILE: assignment2/configs/test.py class config (line 1) | class config(): FILE: assignment2/core/deep_q_learning.py class DQN (line 9) | class DQN(QN): method add_placeholders_op (line 13) | def add_placeholders_op(self): method get_q_values_op (line 17) | def get_q_values_op(self, scope, reuse=False): method add_update_target_op (line 24) | def add_update_target_op(self, q_scope, target_q_scope): method add_loss_op (line 37) | def add_loss_op(self, q, target_q): method add_optimizer_op (line 44) | def add_optimizer_op(self, scope): method process_state (line 51) | def process_state(self, state): method build (line 69) | def build(self): method initialize (line 94) | def initialize(self): method add_summary (line 116) | def add_summary(self): method save (line 153) | def save(self): method get_best_action (line 163) | def get_best_action(self, state): method update_step (line 177) | def update_step(self, t, replay_buffer, lr): method update_target_params (line 221) | def update_target_params(self): FILE: assignment2/core/q_learning.py class QN (line 16) | class QN(object): method __init__ (line 20) | def __init__(self, env, config, logger=None): method build (line 43) | def build(self): method policy (line 51) | def policy(self): method save (line 58) | def save(self): method initialize (line 68) | def initialize(self): method get_best_action (line 75) | def get_best_action(self, state): method get_action (line 87) | def get_action(self, state): method update_target_params (line 100) | def update_target_params(self): method init_averages (line 107) | def init_averages(self): method update_averages (line 122) | def update_averages(self, rewards, max_q_values, q_values, scores_eval): method train (line 144) | def train(self, exp_schedule, lr_schedule): method train_step (line 241) | def train_step(self, t, replay_buffer, lr): method evaluate (line 267) | def evaluate(self, env=None, num_episodes=None): method record (line 323) | def record(self): method run (line 335) | def run(self, exp_schedule, lr_schedule): FILE: assignment2/q1_schedule.py class LinearSchedule (line 5) | class LinearSchedule(object): method __init__ (line 6) | def __init__(self, eps_begin, eps_end, nsteps): method update (line 19) | def update(self, t): class LinearExploration (line 48) | class LinearExploration(LinearSchedule): method __init__ (line 49) | def __init__(self, env, eps_begin, eps_end, nsteps): method get_action (line 61) | def get_action(self, best_action): function test1 (line 91) | def test1(): function test2 (line 104) | def test2(): function test3 (line 112) | def test3(): function your_test (line 120) | def your_test(): FILE: assignment2/q2_linear.py class Linear (line 12) | class Linear(DQN): method add_placeholders_op (line 16) | def add_placeholders_op(self): method get_q_values_op (line 72) | def get_q_values_op(self, state, scope, reuse=False): method add_update_target_op (line 117) | def add_update_target_op(self, q_scope, target_q_scope): method add_loss_op (line 165) | def add_loss_op(self, q, target_q): method add_optimizer_op (line 211) | def add_optimizer_op(self, scope): FILE: assignment2/q3_nature.py class NatureQN (line 13) | class NatureQN(Linear): method get_q_values_op (line 19) | def get_q_values_op(self, state, scope, reuse=False): FILE: assignment2/q6_double_q_learning.py class MyDQN (line 17) | class MyDQN(NatureQN): method add_loss_op (line 38) | def add_loss_op(self, q, target_q): FILE: assignment2/q6_dueling.py class MyDQN (line 16) | class MyDQN(Linear): method get_q_values_op (line 37) | def get_q_values_op(self, state, scope, reuse=False): FILE: assignment2/utils/general.py function export_plot (line 11) | def export_plot(ys, ylabel, filename): function get_logger (line 27) | def get_logger(filename): class Progbar (line 41) | class Progbar(object): method __init__ (line 51) | def __init__(self, target, width=30, verbose=1, discount=0.9): method update (line 63) | def update(self, current, values=[], exact=[], strict=[], exp_avg=[]): method add (line 156) | def add(self, n, values=[]): FILE: assignment2/utils/preprocess.py function greyscale (line 3) | def greyscale(state): function blackandwhite (line 22) | def blackandwhite(state): FILE: assignment2/utils/replay_buffer.py function sample_n_unique (line 4) | def sample_n_unique(sampling_f, n): class ReplayBuffer (line 15) | class ReplayBuffer(object): method __init__ (line 19) | def __init__(self, size, frame_history_len): method can_sample (line 56) | def can_sample(self, batch_size): method _encode_sample (line 60) | def _encode_sample(self, idxes): method sample (line 70) | def sample(self, batch_size): method encode_recent_observation (line 107) | def encode_recent_observation(self): method _encode_observation (line 120) | def _encode_observation(self, idx): method store_frame (line 146) | def store_frame(self, frame): method store_effect (line 174) | def store_effect(self, idx, action, reward, done): FILE: assignment2/utils/test_env.py class ActionSpace (line 3) | class ActionSpace(object): method __init__ (line 4) | def __init__(self, n): method sample (line 7) | def sample(self): class ObservationSpace (line 11) | class ObservationSpace(object): method __init__ (line 12) | def __init__(self, shape): class EnvTest (line 20) | class EnvTest(object): method __init__ (line 24) | def __init__(self, shape=(84, 84, 3)): method reset (line 34) | def reset(self): method step (line 41) | def step(self, action): method render (line 56) | def render(self): FILE: assignment2/utils/viewer.py class SimpleImageViewer (line 4) | class SimpleImageViewer(object): method __init__ (line 9) | def __init__(self, display=None): method imshow (line 15) | def imshow(self, arr): method close (line 48) | def close(self): method __del__ (line 54) | def __del__(self): FILE: assignment2/utils/wrappers.py class MaxAndSkipEnv (line 8) | class MaxAndSkipEnv(gym.Wrapper): method __init__ (line 13) | def __init__(self, env=None, skip=4): method _step (line 20) | def _step(self, action): method _reset (line 34) | def _reset(self): class PreproWrapper (line 42) | class PreproWrapper(gym.Wrapper): method __init__ (line 47) | def __init__(self, env, prepro, shape, overwrite_render=True, high=255): method _step (line 65) | def _step(self, action): method _reset (line 74) | def _reset(self): method _render (line 79) | def _render(self, mode='human', close=False): FILE: assignment3/discrete_env.py function categorical_sample (line 6) | def categorical_sample(prob_n, np_random): class DiscreteEnv (line 16) | class DiscreteEnv(Env): method __init__ (line 31) | def __init__(self, nS, nA, P, isd): method _seed (line 44) | def _seed(self, seed=None): method _reset (line 48) | def _reset(self): method _step (line 53) | def _step(self, a): FILE: assignment3/frozen_lake.py class FrozenLakeEnv (line 23) | class FrozenLakeEnv(discrete_env.DiscreteEnv): method __init__ (line 50) | def __init__(self, desc=None, map_name="4x4",is_slippery=False): method _render (line 128) | def _render(self, mode='human', close=False): FILE: assignment3/q1.py function rmax (line 11) | def rmax(env, gamma, m, R_max, epsilon, num_episodes, max_step = 6): function main (line 79) | def main(): FILE: assignment3/q2.py function learn_Q_QLearning (line 10) | def learn_Q_QLearning(env, num_episodes=10000, gamma = 0.99, lr = 0.1, e... function main (line 63) | def main(): FILE: assignment3/q3.py function rmax (line 11) | def rmax(env, gamma, m, R_max, epsilon, num_episodes, max_step = 6, e = ... function main (line 82) | def main(): FILE: assignment3/utils.py function render_single_Q (line 8) | def render_single_Q(env, Q, max_step = 6):