SYMBOL INDEX (96 symbols across 16 files) FILE: DQN/dqn.py class StateProcessor (line 21) | class StateProcessor(): method __init__ (line 25) | def __init__(self): method process (line 35) | def process(self, sess, state): class Estimator (line 46) | class Estimator(): method __init__ (line 52) | def __init__(self, scope="estimator", summaries_dir=None): method _build_model (line 65) | def _build_model(self): method predict (line 115) | def predict(self, sess, s): method update (line 129) | def update(self, sess, s, a, y): function copy_model_parameters (line 150) | def copy_model_parameters(sess, estimator1, estimator2): function make_epsilon_greedy_policy (line 172) | def make_epsilon_greedy_policy(estimator, nA): function deep_q_learning (line 194) | def deep_q_learning(sess, FILE: PolicyGradient/a3c/estimator_test.py function make_env (line 21) | def make_env(): class PolicyEstimatorTest (line 26) | class PolicyEstimatorTest(tf.test.TestCase): method testPredict (line 27) | def testPredict(self): method testGradient (line 54) | def testGradient(self): class ValueEstimatorTest (line 81) | class ValueEstimatorTest(tf.test.TestCase): method testPredict (line 82) | def testPredict(self): method testGradient (line 107) | def testGradient(self): FILE: PolicyGradient/a3c/estimators.py function build_shared_network (line 4) | def build_shared_network(X, add_summaries=False): class PolicyEstimator (line 36) | class PolicyEstimator(): method __init__ (line 49) | def __init__(self, num_outputs, reuse=False, trainable=True): class ValueEstimator (line 109) | class ValueEstimator(): method __init__ (line 120) | def __init__(self, reuse=False, trainable=True): FILE: PolicyGradient/a3c/policy_monitor.py class PolicyMonitor (line 25) | class PolicyMonitor(object): method __init__ (line 35) | def __init__(self, env, policy_net, summary_writer, saver=None): method _policy_net_predict (line 62) | def _policy_net_predict(self, state, sess): method eval_once (line 67) | def eval_once(self, sess): method continuous_eval (line 100) | def continuous_eval(self, eval_every, sess, coord): FILE: PolicyGradient/a3c/policy_monitor_test.py function make_env (line 24) | def make_env(): class PolicyMonitorTest (line 29) | class PolicyMonitorTest(tf.test.TestCase): method setUp (line 30) | def setUp(self): method testEvalOnce (line 41) | def testEvalOnce(self): FILE: PolicyGradient/a3c/train.py function make_env (line 37) | def make_env(wrap=True): FILE: PolicyGradient/a3c/worker.py function make_copy_params_op (line 24) | def make_copy_params_op(v1_list, v2_list): function make_train_op (line 39) | def make_train_op(local_estimator, global_estimator): class Worker (line 53) | class Worker(object): method __init__ (line 67) | def __init__(self, name, env, policy_net, value_net, global_counter, d... method run (line 95) | def run(self, sess, coord, t_max): method _policy_net_predict (line 118) | def _policy_net_predict(self, state, sess): method _value_net_predict (line 123) | def _value_net_predict(self, state, sess): method run_n_steps (line 128) | def run_n_steps(self, n, sess): method update (line 155) | def update(self, transitions, sess): FILE: PolicyGradient/a3c/worker_test.py function make_env (line 23) | def make_env(): class WorkerTest (line 28) | class WorkerTest(tf.test.TestCase): method setUp (line 29) | def setUp(self): method testPolicyNetPredict (line 42) | def testPolicyNetPredict(self): method testValueNetPredict (line 59) | def testValueNetPredict(self): method testRunNStepsAndUpdate (line 75) | def testRunNStepsAndUpdate(self): FILE: lib/atari/helpers.py class AtariEnvWrapper (line 3) | class AtariEnvWrapper(object): method __init__ (line 7) | def __init__(self, env): method __getattr__ (line 10) | def __getattr__(self, name): method step (line 13) | def step(self, *args, **kwargs): function atari_make_initial_state (line 27) | def atari_make_initial_state(state): function atari_make_next_state (line 30) | def atari_make_next_state(state, next_state): FILE: lib/atari/state_processor.py class StateProcessor (line 4) | class StateProcessor(): method __init__ (line 8) | def __init__(self): method process (line 18) | def process(self, state, sess=None): FILE: lib/envs/blackjack.py function cmp (line 5) | def cmp(a, b): function draw_card (line 12) | def draw_card(np_random): function draw_hand (line 16) | def draw_hand(np_random): function usable_ace (line 20) | def usable_ace(hand): # Does this hand have a usable ace? function sum_hand (line 24) | def sum_hand(hand): # Return current hand total function is_bust (line 30) | def is_bust(hand): # Is this hand a bust? function score (line 34) | def score(hand): # What is the score of this hand (0 if bust) function is_natural (line 38) | def is_natural(hand): # Is this hand a natural blackjack? class BlackjackEnv (line 42) | class BlackjackEnv(gym.Env): method __init__ (line 67) | def __init__(self, natural=False): method reset (line 82) | def reset(self): method step (line 85) | def step(self, action): method _seed (line 88) | def _seed(self, seed=None): method _step (line 92) | def _step(self, action): method _get_obs (line 111) | def _get_obs(self): method _reset (line 114) | def _reset(self): FILE: lib/envs/cliff_walking.py class CliffWalkingEnv (line 12) | class CliffWalkingEnv(discrete.DiscreteEnv): method _limit_coordinates (line 16) | def _limit_coordinates(self, coord): method _calculate_transition_prob (line 23) | def _calculate_transition_prob(self, current, delta): method __init__ (line 31) | def __init__(self): method render (line 57) | def render(self, mode='human', close=False): method _render (line 60) | def _render(self, mode='human', close=False): FILE: lib/envs/discrete.py class DiscreteEnv (line 7) | class DiscreteEnv(Env): method __init__ (line 23) | def __init__(self, nS, nA, P, isd): method seed (line 36) | def seed(self, seed=None): method reset (line 40) | def reset(self): method step (line 45) | def step(self, a): FILE: lib/envs/gridworld.py class GridworldEnv (line 12) | class GridworldEnv(discrete.DiscreteEnv): method __init__ (line 34) | def __init__(self, shape=[4,4]): method _render (line 88) | def _render(self, mode='human', close=False): FILE: lib/envs/windy_gridworld.py class WindyGridworldEnv (line 13) | class WindyGridworldEnv(discrete.DiscreteEnv): method _limit_coordinates (line 17) | def _limit_coordinates(self, coord): method _calculate_transition_prob (line 24) | def _calculate_transition_prob(self, current, delta, winds): method __init__ (line 31) | def __init__(self): method render (line 58) | def render(self, mode='human', close=False): method _render (line 61) | def _render(self, mode='human', close=False): FILE: lib/plotting.py function plot_cost_to_go_mountain_car (line 10) | def plot_cost_to_go_mountain_car(env, estimator, num_tiles=20): function plot_value_function (line 28) | def plot_value_function(V, title="Value Function"): function plot_episode_stats (line 63) | def plot_episode_stats(stats, smoothing_window=10, noshow=False):