SYMBOL INDEX (404 symbols across 26 files) FILE: chapter01/tic_tac_toe.py class State (line 19) | class State: method __init__ (line 20) | def __init__(self): method hash (line 31) | def hash(self): method is_end (line 39) | def is_end(self): method next_state (line 82) | def next_state(self, i, j, symbol): method print_state (line 89) | def print_state(self): function get_all_states_impl (line 105) | def get_all_states_impl(current_state, current_symbol, all_states): function get_all_states (line 118) | def get_all_states(): class Judger (line 131) | class Judger: method __init__ (line 134) | def __init__(self, player1, player2): method reset (line 144) | def reset(self): method alternate (line 148) | def alternate(self): method play (line 154) | def play(self, print_state=False): class Player (line 176) | class Player: method __init__ (line 179) | def __init__(self, step_size=0.1, epsilon=0.1): method reset (line 187) | def reset(self): method set_state (line 191) | def set_state(self, state): method set_symbol (line 195) | def set_symbol(self, symbol): method backup (line 211) | def backup(self): method act (line 222) | def act(self): method save_policy (line 249) | def save_policy(self): method load_policy (line 253) | def load_policy(self): class HumanPlayer (line 263) | class HumanPlayer: method __init__ (line 264) | def __init__(self, **kwargs): method reset (line 269) | def reset(self): method set_state (line 272) | def set_state(self, state): method set_symbol (line 275) | def set_symbol(self, symbol): method act (line 278) | def act(self): function train (line 287) | def train(epochs, print_every_n=500): function compete (line 308) | def compete(turns): function play (line 328) | def play(): FILE: chapter02/ten_armed_testbed.py class Bandit (line 19) | class Bandit: method __init__ (line 28) | def __init__(self, k_arm=10, epsilon=0., initial=0., step_size=0.1, sa... method reset (line 43) | def reset(self): method act (line 58) | def act(self): method step (line 77) | def step(self, action): function simulate (line 101) | def simulate(runs, time, bandits): function figure_2_1 (line 118) | def figure_2_1(): function figure_2_2 (line 126) | def figure_2_2(runs=2000, time=1000): function figure_2_3 (line 151) | def figure_2_3(runs=2000, time=1000): function figure_2_4 (line 167) | def figure_2_4(runs=2000, time=1000): function figure_2_5 (line 183) | def figure_2_5(runs=2000, time=1000): function figure_2_6 (line 205) | def figure_2_6(runs=2000, time=1000): FILE: chapter03/grid_world.py function step (line 34) | def step(state, action): function draw_image (line 50) | def draw_image(image): function draw_policy (line 84) | def draw_policy(optimal_values): function figure_3_2 (line 127) | def figure_3_2(): function figure_3_2_linear_system (line 145) | def figure_3_2_linear_system(): function figure_3_5 (line 168) | def figure_3_5(): FILE: chapter04/car_rental.py function poisson_probability (line 56) | def poisson_probability(n, lam): function expected_return (line 64) | def expected_return(state, action, state_value, constant_returned_cars): function figure_4_2 (line 124) | def figure_4_2(constant_returned_cars=True): FILE: chapter04/car_rental_synchronous.py function poisson (line 43) | def poisson(n, lam): class PolicyIteration (line 51) | class PolicyIteration: method __init__ (line 52) | def __init__(self, truncate, parallel_processes, delta=1e-2, gamma=0.9... method solve (line 63) | def solve(self): method policy_evaluation (line 83) | def policy_evaluation(self, values, policy): method policy_improvement (line 107) | def policy_improvement(self, actions, values, policy): method bellman (line 129) | def bellman(self, values, action, state): method expected_return_pe (line 179) | def expected_return_pe(self, policy, values, state): method expected_return_pi (line 186) | def expected_return_pi(self, values, action, state): method plot (line 193) | def plot(self): FILE: chapter04/gamblers_problem.py function figure_4_3 (line 25) | def figure_4_3(): FILE: chapter04/grid_world.py function is_terminal (line 25) | def is_terminal(state): function step (line 30) | def step(state, action): function draw_image (line 44) | def draw_image(image): function compute_state_value (line 66) | def compute_state_value(in_place=True, discount=1.0): function figure_4_1 (line 93) | def figure_4_1(): FILE: chapter05/blackjack.py function target_policy_player (line 30) | def target_policy_player(usable_ace_player, player_sum, dealer_card): function behavior_policy_player (line 34) | def behavior_policy_player(usable_ace_player, player_sum, dealer_card): function get_card (line 47) | def get_card(): function card_value (line 53) | def card_value(card_id): function play (line 60) | def play(policy_player, initial_state=None, initial_action=None): function monte_carlo_on_policy (line 181) | def monte_carlo_on_policy(episodes): function monte_carlo_es (line 202) | def monte_carlo_es(episodes): function monte_carlo_off_policy (line 243) | def monte_carlo_off_policy(episodes): function figure_5_1 (line 279) | def figure_5_1(): function figure_5_2 (line 307) | def figure_5_2(): function figure_5_3 (line 341) | def figure_5_3(): FILE: chapter05/infinite_variance.py function behavior_policy (line 18) | def behavior_policy(): function target_policy (line 22) | def target_policy(): function play (line 26) | def play(): function figure_5_4 (line 37) | def figure_5_4(): FILE: chapter06/cliff_walking.py function step (line 41) | def step(state, action): function choose_action (line 85) | def choose_action(state, q_value): function sarsa (line 97) | def sarsa(q_value, expected=False, step_size=ALPHA): function q_learning (line 128) | def q_learning(q_value, step_size=ALPHA): function print_optimal_policy (line 143) | def print_optimal_policy(q_value): function figure_6_4 (line 167) | def figure_6_4(): function figure_6_6 (line 210) | def figure_6_6(): FILE: chapter06/maximization_bias.py function choose_action (line 54) | def choose_action(state, q_value): function take_action (line 62) | def take_action(state, action): function q_learning (line 69) | def q_learning(q1, q2=None): function figure_6_7 (line 103) | def figure_6_7(): FILE: chapter06/random_walk.py function temporal_difference (line 36) | def temporal_difference(values, alpha=0.1, batch=False): function monte_carlo (line 60) | def monte_carlo(values, alpha=0.1, batch=False): function compute_state_value (line 86) | def compute_state_value(): function rms_error (line 100) | def rms_error(): function batch_updating (line 132) | def batch_updating(method, episodes, alpha=0.001): function example_6_2 (line 170) | def example_6_2(): function figure_6_2 (line 182) | def figure_6_2(): FILE: chapter06/windy_grid_world.py function step (line 42) | def step(state, action): function episode (line 56) | def episode(q_value): function figure_6_3 (line 88) | def figure_6_3(): FILE: chapter07/random_walk.py function temporal_difference (line 40) | def temporal_difference(value, n, alpha): function figure7_2 (line 98) | def figure7_2(): FILE: chapter08/expectation_vs_sample.py function b_steps (line 15) | def b_steps(b): function figure_8_7 (line 34) | def figure_8_7(): FILE: chapter08/maze.py class PriorityQueue (line 17) | class PriorityQueue: method __init__ (line 18) | def __init__(self): method add_item (line 24) | def add_item(self, item, priority=0): method remove_item (line 32) | def remove_item(self, item): method pop_item (line 36) | def pop_item(self): method empty (line 44) | def empty(self): class Maze (line 50) | class Maze: method __init__ (line 51) | def __init__(self): method extend_state (line 94) | def extend_state(self, state, factor): method extend_maze (line 104) | def extend_maze(self, factor): method step (line 120) | def step(self, state, action): class DynaParams (line 139) | class DynaParams: method __init__ (line 140) | def __init__(self): function choose_action (line 167) | def choose_action(state, q_value, maze, dyna_params): class TrivialModel (line 175) | class TrivialModel: method __init__ (line 177) | def __init__(self, rand=np.random): method feed (line 182) | def feed(self, state, action, next_state, reward): method sample (line 190) | def sample(self): class TimeModel (line 201) | class TimeModel: method __init__ (line 205) | def __init__(self, maze, time_weight=1e-4, rand=np.random): method feed (line 216) | def feed(self, state, action, next_state, reward): method sample (line 233) | def sample(self): class PriorityModel (line 249) | class PriorityModel(TrivialModel): method __init__ (line 250) | def __init__(self, rand=np.random): method insert (line 258) | def insert(self, priority, state, action): method empty (line 263) | def empty(self): method sample (line 267) | def sample(self): method feed (line 275) | def feed(self, state, action, next_state, reward): method predecessor (line 284) | def predecessor(self, state): function dyna_q (line 298) | def dyna_q(q_value, model, maze, dyna_params): function prioritized_sweeping (line 340) | def prioritized_sweeping(q_value, model, maze, dyna_params): function figure_8_2 (line 398) | def figure_8_2(): function changing_maze (line 434) | def changing_maze(maze, dyna_params): function figure_8_4 (line 476) | def figure_8_4(): function figure_8_5 (line 517) | def figure_8_5(): function check_path (line 558) | def check_path(q_values, maze): function example_8_4 (line 574) | def example_8_4(): FILE: chapter08/trajectory_sampling.py function argmax (line 29) | def argmax(value): class Task (line 34) | class Task: method __init__ (line 38) | def __init__(self, n_states, b): method step (line 49) | def step(self, state, action): function evaluate_pi (line 58) | def evaluate_pi(q, task): function uniform (line 75) | def uniform(task, eval_interval): function on_policy (line 95) | def on_policy(task, eval_interval): function figure_8_8 (line 122) | def figure_8_8(): FILE: chapter09/random_walk.py function compute_true_value (line 35) | def compute_true_value(): function step (line 61) | def step(state, action): function get_action (line 75) | def get_action(): class ValueFunction (line 81) | class ValueFunction: method __init__ (line 83) | def __init__(self, num_of_groups): method value (line 91) | def value(self, state): method update (line 100) | def update(self, delta, state): class TilingsValueFunction (line 105) | class TilingsValueFunction: method __init__ (line 109) | def __init__(self, numOfTilings, tileWidth, tilingOffset): method value (line 126) | def value(self, state): method update (line 138) | def update(self, delta, state): class BasesValueFunction (line 153) | class BasesValueFunction: method __init__ (line 156) | def __init__(self, order, type): method value (line 170) | def value(self, state): method update (line 177) | def update(self, delta, state): function gradient_monte_carlo (line 188) | def gradient_monte_carlo(value_function, alpha, distribution=None): function semi_gradient_temporal_difference (line 211) | def semi_gradient_temporal_difference(value_function, n, alpha): function figure_9_1 (line 261) | def figure_9_1(true_value): function figure_9_2_left (line 293) | def figure_9_2_left(true_value): function figure_9_2_right (line 308) | def figure_9_2_right(true_value): function figure_9_2 (line 343) | def figure_9_2(true_value): function figure_9_5 (line 354) | def figure_9_5(true_value): function figure_9_10 (line 398) | def figure_9_10(true_value): FILE: chapter09/square_wave.py class Interval (line 17) | class Interval: method __init__ (line 19) | def __init__(self, left, right): method contain (line 24) | def contain(self, x): method size (line 28) | def size(self): function square_wave (line 35) | def square_wave(x): function sample (line 41) | def sample(n): class ValueFunction (line 50) | class ValueFunction: method __init__ (line 53) | def __init__(self, feature_width, domain=DOMAIN, alpha=0.2, num_of_fea... method get_active_features (line 73) | def get_active_features(self, x): method value (line 81) | def value(self, x): method update (line 87) | def update(self, delta, x): function approximate (line 94) | def approximate(samples, value_function): function figure_9_8 (line 100) | def figure_9_8(): FILE: chapter10/access_control.py class IHT (line 25) | class IHT: method __init__ (line 27) | def __init__(self, size_val): method count (line 32) | def count(self): method full (line 35) | def full(self): method get_index (line 38) | def get_index(self, obj, read_only=False): function hash_coords (line 54) | def hash_coords(coordinates, m, read_only=False): function tiles (line 59) | def tiles(iht_or_size, num_tilings, floats, ints=None, read_only=False): class ValueFunction (line 104) | class ValueFunction: method __init__ (line 112) | def __init__(self, num_of_tilings, alpha=ALPHA, beta=BETA): method get_active_tiles (line 130) | def get_active_tiles(self, free_servers, priority, action): method value (line 137) | def value(self, free_servers, priority, action): method state_value (line 142) | def state_value(self, free_servers, priority): method learn (line 150) | def learn(self, free_servers, priority, action, new_free_servers, new_... function get_action (line 161) | def get_action(free_servers, priority, value_function): function take_action (line 171) | def take_action(free_servers, priority, action): function differential_semi_gradient_sarsa (line 183) | def differential_semi_gradient_sarsa(value_function, max_steps): function figure_10_5 (line 203) | def figure_10_5(): FILE: chapter10/mountain_car.py class IHT (line 24) | class IHT: method __init__ (line 26) | def __init__(self, size_val): method count (line 31) | def count(self): method full (line 34) | def full(self): method get_index (line 37) | def get_index(self, obj, read_only=False): function hash_coords (line 53) | def hash_coords(coordinates, m, read_only=False): function tiles (line 58) | def tiles(iht_or_size, num_tilings, floats, ints=None, read_only=False): function step (line 95) | def step(position, velocity, action): class ValueFunction (line 106) | class ValueFunction: method __init__ (line 113) | def __init__(self, step_size, num_of_tilings=8, max_size=2048): method get_active_tiles (line 130) | def get_active_tiles(self, position, velocity, action): method value (line 139) | def value(self, position, velocity, action): method learn (line 146) | def learn(self, position, velocity, action, target): method cost_to_go (line 154) | def cost_to_go(self, position, velocity): function get_action (line 161) | def get_action(position, velocity, value_function): function semi_gradient_n_step_sarsa (line 172) | def semi_gradient_n_step_sarsa(value_function, n=1): function print_cost (line 234) | def print_cost(value_function, episode, ax): function figure_10_1 (line 258) | def figure_10_1(): function figure_10_2 (line 275) | def figure_10_2(): function figure_10_3 (line 302) | def figure_10_3(): function figure_10_4 (line 330) | def figure_10_4(): FILE: chapter11/counterexample.py function step (line 41) | def step(state, action): function target_policy (line 47) | def target_policy(state): function behavior_policy (line 61) | def behavior_policy(state): function semi_gradient_off_policy_TD (line 71) | def semi_gradient_off_policy_TD(state, theta, alpha): function semi_gradient_DP (line 89) | def semi_gradient_DP(theta, alpha): function TDC (line 110) | def TDC(state, theta, weight, alpha, beta): function expected_TDC (line 129) | def expected_TDC(theta, weight, alpha, beta): function expected_emphatic_TD (line 155) | def expected_emphatic_TD(theta, emphasis, alpha): function compute_RMSVE (line 179) | def compute_RMSVE(theta): function compute_RMSPBE (line 184) | def compute_RMSPBE(theta): function figure_11_2_left (line 196) | def figure_11_2_left(): function figure_11_2_right (line 218) | def figure_11_2_right(): function figure_11_2 (line 238) | def figure_11_2(): function figure_11_6_left (line 249) | def figure_11_6_left(): function figure_11_6_right (line 278) | def figure_11_6_right(): function figure_11_6 (line 305) | def figure_11_6(): function figure_11_7 (line 316) | def figure_11_7(): FILE: chapter12/lambda_effect.py class IHT (line 60) | class IHT: method __init__ (line 63) | def __init__(self, sizeval): method __str__ (line 68) | def __str__(self): method count (line 75) | def count(self): method fullp (line 78) | def fullp(self): method getindex (line 81) | def getindex(self, obj, readonly=False): function hashcoords (line 98) | def hashcoords(coordinates, m, readonly=False): function tiles (line 106) | def tiles(ihtORsize, numtilings, floats, ints=[], readonly=False): function tileswrap (line 122) | def tileswrap(ihtORsize, numtilings, floats, wrapwidths, ints=[], readon... class IndexHashTable (line 139) | class IndexHashTable: method __init__ (line 141) | def __init__(self, iht_size, num_tilings, tiling_size, obs_bounds): method get_tiles (line 153) | def get_tiles(self, state, action): function update_trace_vector (line 167) | def update_trace_vector(agent, method, state, action=None): class RandomWalkEnvironment (line 202) | class RandomWalkEnvironment: method __init__ (line 204) | def __init__(self): method step (line 214) | def step(self, state, action): class RandomWalkAgent (line 220) | class RandomWalkAgent: method __init__ (line 221) | def __init__(self, lmbda, alpha): method error_hist (line 246) | def error_hist(self): method get_all_v_hat (line 249) | def get_all_v_hat(self): method policy (line 253) | def policy(self, state): method v_hat (line 257) | def v_hat(self, state): method grad_v_hat (line 264) | def grad_v_hat(self, state): method get_active_features (line 270) | def get_active_features(self, state): method run_td_lambda (line 274) | def run_td_lambda(self, env, n_episodes, method): class RandomWalk (line 316) | class RandomWalk: method __init__ (line 317) | def __init__(self, lmbda, alpha): method error_hist (line 322) | def error_hist(self): method train (line 325) | def train(self, n_episodes, method): class MountainCarEnvironment (line 334) | class MountainCarEnvironment: method __init__ (line 336) | def __init__(self): method step (line 350) | def step(self, state, action): class MountainCarAgent (line 368) | class MountainCarAgent: method __init__ (line 369) | def __init__(self, alpha, lmbda, iht_args): method n_step_hist (line 401) | def n_step_hist(self): method policy (line 404) | def policy(self, state): method get_init_state (line 415) | def get_init_state(self): method is_terminal_state (line 421) | def is_terminal_state(self, state): method q_hat (line 424) | def q_hat(self, state, action): method get_active_features (line 433) | def get_active_features(self, state, action): method run_sarsa_lambda (line 437) | def run_sarsa_lambda(self, env, n_episodes, method): class MountainCar (line 507) | class MountainCar: method __init__ (line 508) | def __init__(self, lmbda, alpha): method n_step_hist (line 529) | def n_step_hist(self): method train (line 532) | def train(self, n_episodes, method): class CartPoleEnvironment (line 541) | class CartPoleEnvironment: method __init__ (line 544) | def __init__(self): method is_state_valid (line 563) | def is_state_valid(self, state): method step (line 574) | def step(self, state, action): class CartPoleAgent (line 603) | class CartPoleAgent: method __init__ (line 604) | def __init__(self, iht_args, alpha, lmbda): method n_failures (line 633) | def n_failures(self): method policy (line 636) | def policy(self, state): method is_state_valid (line 649) | def is_state_valid(self, state): method get_init_state (line 657) | def get_init_state(self): method is_state_over_bounds (line 662) | def is_state_over_bounds(self, state): method q_hat (line 674) | def q_hat(self, state, action): method get_active_features (line 682) | def get_active_features(self, state, action): method run_sarsa_lambda (line 686) | def run_sarsa_lambda(self, env, n_step_max, method): class CartPole (line 754) | class CartPole: method __init__ (line 755) | def __init__(self, lmbda, alpha): method n_failures (line 779) | def n_failures(self): method train (line 782) | def train(self, n_step_max, method): class PuddleWorldGrid (line 792) | class PuddleWorldGrid: method __init__ (line 793) | def __init__(self): method height (line 808) | def height(self): method width (line 812) | def width(self): method is_state_goal (line 815) | def is_state_goal(self, state): method get_dist2puddle (line 821) | def get_dist2puddle(self, state): method cvt_ij2xy (line 868) | def cvt_ij2xy(self, pos_ij): method draw (line 871) | def draw(self): class PuddleWorldEnvironment (line 907) | class PuddleWorldEnvironment: method __init__ (line 908) | def __init__(self, grid): method step (line 920) | def step(self, state, action): class PuddleWorldAgent (line 937) | class PuddleWorldAgent: method __init__ (line 938) | def __init__(self, grid, alpha, lmbda, iht_args): method cost_per_ep_hist (line 961) | def cost_per_ep_hist(self): method policy (line 964) | def policy(self, state): method get_start_pos (line 978) | def get_start_pos(self): method is_terminal_state (line 993) | def is_terminal_state(self, state): method q_hat (line 996) | def q_hat(self, state, action): method get_active_features (line 1005) | def get_active_features(self, state, action): method run_sarsa_lambda (line 1009) | def run_sarsa_lambda(self, env, n_episodes, method): class PuddleWorld (line 1058) | class PuddleWorld: method __init__ (line 1059) | def __init__(self, lmbda, alpha): method cost_per_ep_hist (line 1083) | def cost_per_ep_hist(self): method draw (line 1086) | def draw(self): method train (line 1089) | def train(self, n_episodes, method): function get_puddle_world_map (line 1094) | def get_puddle_world_map(): function get_random_walk_plot_data (line 1109) | def get_random_walk_plot_data(): function get_mountain_car_plot_data (line 1145) | def get_mountain_car_plot_data(): function get_cart_pole_plot_data (line 1176) | def get_cart_pole_plot_data(): function get_puddle_world_plot_data (line 1206) | def get_puddle_world_plot_data(): function figure_12_14 (line 1243) | def figure_12_14(): FILE: chapter12/mountain_car.py class IHT (line 22) | class IHT: method __init__ (line 24) | def __init__(self, size_val): method count (line 29) | def count(self): method full (line 32) | def full(self): method get_index (line 35) | def get_index(self, obj, read_only=False): function hash_coords (line 51) | def hash_coords(coordinates, m, read_only=False): function tiles (line 56) | def tiles(iht_or_size, num_tilings, floats, ints=None, read_only=False): function step (line 99) | def step(position, velocity, action): function accumulating_trace (line 114) | def accumulating_trace(trace, active_tiles, lam): function replacing_trace (line 124) | def replacing_trace(trace, activeTiles, lam): function replacing_trace_with_clearing (line 136) | def replacing_trace_with_clearing(trace, active_tiles, lam, clearing_til... function dutch_trace (line 149) | def dutch_trace(trace, active_tiles, lam, alpha): class Sarsa (line 156) | class Sarsa: method __init__ (line 163) | def __init__(self, step_size, lam, trace_update=accumulating_trace, nu... method get_active_tiles (line 185) | def get_active_tiles(self, position, velocity, action): method value (line 194) | def value(self, position, velocity, action): method learn (line 201) | def learn(self, position, velocity, action, target): method cost_to_go (line 220) | def cost_to_go(self, position, velocity): function get_action (line 227) | def get_action(position, velocity, valueFunction): function play (line 237) | def play(evaluator): function figure_12_10 (line 259) | def figure_12_10(): function figure_12_11 (line 292) | def figure_12_11(): FILE: chapter12/random_walk.py class ValueFunction (line 36) | class ValueFunction: method __init__ (line 39) | def __init__(self, rate, step_size): method value (line 45) | def value(self, state): method learn (line 50) | def learn(self, state, reward): method new_episode (line 56) | def new_episode(self): class OffLineLambdaReturn (line 60) | class OffLineLambdaReturn(ValueFunction): method __init__ (line 61) | def __init__(self, rate, step_size): method new_episode (line 66) | def new_episode(self): method learn (line 72) | def learn(self, state, reward): method n_step_return_from_time (line 82) | def n_step_return_from_time(self, n, time): method lambda_return_from_time (line 92) | def lambda_return_from_time(self, time): method off_line_learn (line 107) | def off_line_learn(self): class TemporalDifferenceLambda (line 116) | class TemporalDifferenceLambda(ValueFunction): method __init__ (line 117) | def __init__(self, rate, step_size): method new_episode (line 121) | def new_episode(self): method learn (line 127) | def learn(self, state, reward): class TrueOnlineTemporalDifferenceLambda (line 137) | class TrueOnlineTemporalDifferenceLambda(ValueFunction): method __init__ (line 138) | def __init__(self, rate, step_size): method new_episode (line 141) | def new_episode(self): method learn (line 149) | def learn(self, state, reward): function random_walk (line 163) | def random_walk(value_function): function parameter_sweep (line 182) | def parameter_sweep(value_function_generator, runs, lambdas, alphas): function figure_12_3 (line 207) | def figure_12_3(): function figure_12_6 (line 223) | def figure_12_6(): function figure_12_8 (line 239) | def figure_12_8(): FILE: chapter13/short_corridor.py function true_value (line 15) | def true_value(p): class ShortCorridor (line 26) | class ShortCorridor: method __init__ (line 30) | def __init__(self): method reset (line 33) | def reset(self): method step (line 36) | def step(self, go_right): function softmax (line 60) | def softmax(x): class ReinforceAgent (line 64) | class ReinforceAgent: method __init__ (line 69) | def __init__(self, alpha, gamma): method get_pi (line 80) | def get_pi(self): method get_p_right (line 95) | def get_p_right(self): method choose_action (line 98) | def choose_action(self, reward): method episode_end (line 108) | def episode_end(self, last_reward): class ReinforceBaselineAgent (line 132) | class ReinforceBaselineAgent(ReinforceAgent): method __init__ (line 133) | def __init__(self, alpha, gamma, alpha_w): method episode_end (line 138) | def episode_end(self, last_reward): function trial (line 164) | def trial(num_episodes, agent_generator): function example_13_1 (line 187) | def example_13_1(): function figure_13_1 (line 215) | def figure_13_1(): function figure_13_2 (line 243) | def figure_13_2():