SYMBOL INDEX (305 symbols across 36 files)

FILE: bc/bc.py
  function get_tf_session (line 33) | def get_tf_session():
  function load_dataset (line 50) | def load_dataset(args):
  function policy_model (line 119) | def policy_model(data_in, action_dim):
  function get_batch (line 152) | def get_batch(expert_obs, expert_act, batch_size):
  function run_bc (line 165) | def run_bc(session, args, log_dir):
  function run_bc_test (line 237) | def run_bc_test(args, session, policy_fn, x, env):

FILE: bc/load_policy.py
  function load_policy (line 3) | def load_policy(filename):

FILE: bc/plot_bc.py
  function plot_bc_modern (line 36) | def plot_bc_modern(edir):
  function plot_bc_humanoid (line 93) | def plot_bc_humanoid(edir):
  function boring_stuff (line 151) | def boring_stuff(axarr, edir):
  function plot_bc (line 172) | def plot_bc(e):

FILE: bc/run_expert.py
  function main (line 30) | def main():

FILE: bc/tf_util.py
  function sum (line 18) | def sum(x, axis=None, keepdims=False):
  function mean (line 20) | def mean(x, axis=None, keepdims=False):
  function var (line 22) | def var(x, axis=None, keepdims=False):
  function std (line 25) | def std(x, axis=None, keepdims=False):
  function max (line 27) | def max(x, axis=None, keepdims=False):
  function min (line 29) | def min(x, axis=None, keepdims=False):
  function concatenate (line 31) | def concatenate(arrs, axis=0):
  function argmax (line 33) | def argmax(x, axis=None):
  function switch (line 36) | def switch(condition, then_expression, else_expression):
  function l2loss (line 55) | def l2loss(params):
  function lrelu (line 60) | def lrelu(x, leak=0.2):
  function categorical_sample_logits (line 64) | def categorical_sample_logits(X):
  function get_session (line 73) | def get_session():
  function single_threaded_session (line 76) | def single_threaded_session():
  function make_session (line 82) | def make_session(num_cpu):
  function initialize (line 90) | def initialize():
  function eval (line 96) | def eval(expr, feed_dict=None):
  function set_value (line 100) | def set_value(v, val):
  function load_state (line 103) | def load_state(fname):
  function save_state (line 107) | def save_state(fname):
  function normc_initializer (line 117) | def normc_initializer(std=1.0):
  function conv2d (line 125) | def conv2d(x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad=...
  function dense (line 155) | def dense(x, size, name, weight_init=None, bias=True):
  function wndense (line 164) | def wndense(x, size, name, init_scale=1.0):
  function densenobias (line 175) | def densenobias(x, size, name, weight_init=None):
  function dropout (line 178) | def dropout(x, pkeep, phase=None, mask=None):
  function batchnorm (line 185) | def batchnorm(x, name, phase, updates, gamma=0.96):
  function function (line 213) | def function(inputs, outputs, updates=None, givens=None):
  class _Function (line 223) | class _Function(object):
    method __init__ (line 224) | def __init__(self, inputs, outputs, updates, givens, check_nan=False):
    method __call__ (line 232) | def __call__(self, *inputvals):
  function mem_friendly_function (line 242) | def mem_friendly_function(nondata_inputs, data_inputs, outputs, batch_si...
  class _MemFriendlyFunction (line 249) | class _MemFriendlyFunction(object):
    method __init__ (line 250) | def __init__(self, nondata_inputs, data_inputs, outputs, batch_size):
    method __call__ (line 255) | def __call__(self, *inputvals):
  class Module (line 281) | class Module(object):
    method __init__ (line 282) | def __init__(self, name):
    method __call__ (line 287) | def __call__(self, *args):
    method _call (line 303) | def _call(self, *args):
    method trainable_variables (line 307) | def trainable_variables(self):
    method variables (line 312) | def variables(self):
  function module (line 317) | def module(name):
  function get_parents (line 333) | def get_parents(node):
  function topsorted (line 336) | def topsorted(outputs):
  function var_shape (line 377) | def var_shape(x):
  function numel (line 383) | def numel(x):
  function intprod (line 386) | def intprod(x):
  function flatgrad (line 389) | def flatgrad(loss, var_list):
  class SetFromFlat (line 394) | class SetFromFlat(object):
    method __init__ (line 395) | def __init__(self, var_list, dtype=tf.float32):
    method __call__ (line 408) | def __call__(self, theta):
  class GetFlat (line 411) | class GetFlat(object):
    method __init__ (line 412) | def __init__(self, var_list):
    method __call__ (line 414) | def __call__(self):
  function fancy_slice_2d (line 422) | def fancy_slice_2d(X, inds0, inds1):
  function scope_vars (line 435) | def scope_vars(scope, trainable_only):
  function lengths_to_mask (line 445) | def lengths_to_mask(lengths_b, max_length):
  function in_session (line 463) | def in_session(f):
  function get_placeholder (line 472) | def get_placeholder(name, dtype, shape):
  function get_placeholder_cached (line 482) | def get_placeholder_cached(name):
  function flattenallbut0 (line 485) | def flattenallbut0(x):
  function reset (line 488) | def reset():

FILE: ddpg/ddpg.py
  class DDPGAgent (line 20) | class DDPGAgent(object):
    method __init__ (line 22) | def __init__(self, sess, env, test_env, args):
    method train (line 42) | def train(self):
    method _do_rollouts (line 103) | def _do_rollouts(self):
    method _debug_print (line 133) | def _debug_print(self):
  class Network (line 150) | class Network(object):
    method __init__ (line 157) | def __init__(self, sess, env, args):
  class Actor (line 178) | class Actor(Network):
    method __init__ (line 187) | def __init__(self, sess, env, args):
    method _build_net (line 224) | def _build_net(self, input_BO, scope):
    method sample_action (line 250) | def sample_action(self, obs, train=True):
    method update_target_net (line 273) | def update_target_net(self, smooth=True):
    method update_weights (line 285) | def update_weights(self, f, a_grads_BA):
  class Critic (line 294) | class Critic(Network):
    method __init__ (line 300) | def __init__(self, sess, env, args):
    method _build_net (line 345) | def _build_net(self, input_BO, acts_BO, scope):
    method update_target_net (line 374) | def update_target_net(self, smooth=True):
    method update_weights (line 386) | def update_weights(self, f):

FILE: ddpg/replay_buffer.py
  class ReplayBuffer (line 5) | class ReplayBuffer(object):
    method __init__ (line 7) | def __init__(self, size, ob_dim, ac_dim):
    method add_sample (line 53) | def add_sample(self, s, a, r, done):
    method sample (line 77) | def sample(self, num):

FILE: dqn/atari_wrappers.py
  class NoopResetEnv (line 8) | class NoopResetEnv(gym.Wrapper):
    method __init__ (line 9) | def __init__(self, env=None, noop_max=30):
    method _reset (line 17) | def _reset(self):
  class FireResetEnv (line 25) | class FireResetEnv(gym.Wrapper):
    method __init__ (line 26) | def __init__(self, env=None):
    method _reset (line 32) | def _reset(self):
  class EpisodicLifeEnv (line 38) | class EpisodicLifeEnv(gym.Wrapper):
    method __init__ (line 39) | def __init__(self, env=None):
    method _step (line 48) | def _step(self, action):
    method _reset (line 62) | def _reset(self):
  class MaxAndSkipEnv (line 77) | class MaxAndSkipEnv(gym.Wrapper):
    method __init__ (line 78) | def __init__(self, env=None, skip=4):
    method _step (line 85) | def _step(self, action):
    method _reset (line 99) | def _reset(self):
  function _process_frame84 (line 106) | def _process_frame84(frame):
  class ProcessFrame84 (line 114) | class ProcessFrame84(gym.Wrapper):
    method __init__ (line 115) | def __init__(self, env=None):
    method _step (line 119) | def _step(self, action):
    method _reset (line 123) | def _reset(self):
  class ClippedRewardsWrapper (line 126) | class ClippedRewardsWrapper(gym.Wrapper):
    method _step (line 127) | def _step(self, action):
  function wrap_deepmind_ram (line 131) | def wrap_deepmind_ram(env):
  function wrap_deepmind (line 140) | def wrap_deepmind(env):

FILE: dqn/dqn.py
  function learn (line 15) | def learn(env,

FILE: dqn/dqn_utils.py
  function huber_loss (line 8) | def huber_loss(x, delta=1.0):
  function sample_n_unique (line 16) | def sample_n_unique(sampling_f, n):
  class Schedule (line 27) | class Schedule(object):
    method value (line 28) | def value(self, t):
  class ConstantSchedule (line 32) | class ConstantSchedule(object):
    method __init__ (line 33) | def __init__(self, value):
    method value (line 42) | def value(self, t):
  function linear_interpolation (line 46) | def linear_interpolation(l, r, alpha):
  class PiecewiseSchedule (line 49) | class PiecewiseSchedule(object):
    method __init__ (line 50) | def __init__(self, endpoints, interpolation=linear_interpolation, outs...
    method value (line 74) | def value(self, t):
  class LinearSchedule (line 85) | class LinearSchedule(object):
    method __init__ (line 86) | def __init__(self, schedule_timesteps, final_p, initial_p=1.0):
    method value (line 104) | def value(self, t):
  function compute_exponential_averages (line 109) | def compute_exponential_averages(variables, decay):
  function minimize_and_clip (line 130) | def minimize_and_clip(optimizer, objective, var_list, clip_val=10):
  function initialize_interdependent_variables (line 141) | def initialize_interdependent_variables(session, vars_list, feed_dict):
  function get_wrapper_by_name (line 164) | def get_wrapper_by_name(env, classname):
  class ReplayBuffer (line 174) | class ReplayBuffer(object):
    method __init__ (line 175) | def __init__(self, size, frame_history_len):
    method can_sample (line 212) | def can_sample(self, batch_size):
    method _encode_sample (line 216) | def _encode_sample(self, idxes):
    method sample (line 226) | def sample(self, batch_size):
    method encode_recent_observation (line 263) | def encode_recent_observation(self):
    method _encode_observation (line 276) | def _encode_observation(self, idx):
    method store_frame (line 302) | def store_frame(self, frame):
    method store_effect (line 330) | def store_effect(self, idx, action, reward, done):

FILE: dqn/plot_dqn.py
  function smoothed_block (line 19) | def smoothed_block(x, n):

FILE: dqn/run_dqn_atari.py
  function atari_model (line 17) | def atari_model(img_in, num_actions, scope, reuse=False):
  function atari_learn (line 33) | def atari_learn(env,
  function get_available_gpus (line 86) | def get_available_gpus():
  function set_global_seeds (line 92) | def set_global_seeds(i):
  function get_session (line 103) | def get_session():
  function get_env (line 123) | def get_env(task, seed):
  function main (line 134) | def main():

FILE: dqn/run_dqn_ram.py
  function atari_model (line 15) | def atari_model(ram_in, num_actions, scope, reuse=False):
  function atari_learn (line 27) | def atari_learn(env,
  function get_available_gpus (line 77) | def get_available_gpus():
  function set_global_seeds (line 82) | def set_global_seeds(i):
  function get_session (line 92) | def get_session():
  function get_env (line 101) | def get_env(seed):
  function main (line 113) | def main():

FILE: es/es.py
  class ESAgent (line 23) | class ESAgent:
    method __init__ (line 25) | def __init__(self, session, args, log_dir=None, continuous=True):
    method _make_network (line 77) | def _make_network(self, data_in, out_dim):
    method _compute_return (line 101) | def _compute_return(self, test=False, store_info=False):
    method _print_summary (line 147) | def _print_summary(self):
    method run_es (line 160) | def run_es(self):
    method test (line 247) | def test(self, just_one=True):
    method generate_rollout_data (line 286) | def generate_rollout_data(self, weights, num_rollouts):

FILE: es/logz.py
  function colorize (line 30) | def colorize(string, color, bold=False, highlight=False):
  class G (line 38) | class G:
  function configure_output_dir (line 45) | def configure_output_dir(d=None):
  function log_tabular (line 61) | def log_tabular(key, val):
  function dump_tabular (line 73) | def dump_tabular():

FILE: es/optimizers.py
  class Optimizer (line 10) | class Optimizer(object):
    method __init__ (line 11) | def __init__(self, pi):
    method update (line 16) | def update(self, globalg):
    method _compute_step (line 24) | def _compute_step(self, globalg):
  class SGD (line 28) | class SGD(Optimizer):
    method __init__ (line 29) | def __init__(self, pi, stepsize, momentum=0.9):
    method _compute_step (line 34) | def _compute_step(self, globalg):
  class Adam (line 40) | class Adam(Optimizer):
    method __init__ (line 41) | def __init__(self, pi, stepsize, beta1=0.9, beta2=0.999, epsilon=1e-08):
    method _compute_step (line 50) | def _compute_step(self, globalg):

FILE: es/plot.py
  function plot_one_dir (line 55) | def plot_one_dir(args, directory):

FILE: es/toy_es.py
  function f (line 26) | def f(w, sol):
  function run_es (line 31) | def run_es(args):

FILE: es/utils.py
  function compute_ranks (line 12) | def compute_ranks(x):
  function compute_centered_ranks (line 26) | def compute_centered_ranks(x):
  function get_tf_session (line 60) | def get_tf_session():
  function normc_initializer (line 77) | def normc_initializer(std=1.0):

FILE: g_learning/G-Learning.py
  class GLearningAgent (line 30) | class GLearningAgent():
    method __init__ (line 32) | def __init__(self, env, k):
    method policy_exploration (line 54) | def policy_exploration(self, state, epsilon=0.0):
    method alpha_schedule (line 72) | def alpha_schedule(self, t, state, action):
    method beta_schedule (line 88) | def beta_schedule(self, t):
    method g_learning (line 104) | def g_learning(self, num_episodes, max_ep_steps=10000, discount=1.0, e...

FILE: lib/envs/blackjack.py
  function cmp (line 5) | def cmp(a, b):
  function draw_card (line 12) | def draw_card(np_random):
  function draw_hand (line 16) | def draw_hand(np_random):
  function usable_ace (line 20) | def usable_ace(hand):  # Does this hand have a usable ace?
  function sum_hand (line 24) | def sum_hand(hand):  # Return current hand total
  function is_bust (line 30) | def is_bust(hand):  # Is this hand a bust?
  function score (line 34) | def score(hand):  # What is the score of this hand (0 if bust)
  function is_natural (line 38) | def is_natural(hand):  # Is this hand a natural blackjack?
  class BlackjackEnv (line 42) | class BlackjackEnv(gym.Env):
    method __init__ (line 67) | def __init__(self, natural=False):
    method _seed (line 82) | def _seed(self, seed=None):
    method _step (line 86) | def _step(self, action):
    method _get_obs (line 105) | def _get_obs(self):
    method _reset (line 108) | def _reset(self):

FILE: lib/envs/cliff_walking.py
  class CliffWalkingEnv (line 15) | class CliffWalkingEnv(discrete.DiscreteEnv):
    method _limit_coordinates (line 19) | def _limit_coordinates(self, coord):
    method _calculate_transition_prob (line 26) | def _calculate_transition_prob(self, current, delta):
    method __init__ (line 42) | def __init__(self):
    method _render (line 68) | def _render(self, mode='human', close=False):

FILE: lib/envs/gridworld.py
  class GridworldEnv (line 10) | class GridworldEnv(discrete.DiscreteEnv):
    method __init__ (line 32) | def __init__(self, shape=[4,4]):
    method _render (line 85) | def _render(self, mode='human', close=False):

FILE: lib/envs/two_room_domain.py
  class TwoRooms (line 53) | class TwoRooms:
    method __init__ (line 55) | def __init__(self, length=9):
    method _init_grid (line 64) | def _init_grid(self):
    method _check_coords_and_move (line 90) | def _check_coords_and_move(self, coord):
    method step (line 103) | def step(self, action):
    method reset (line 136) | def reset(self):
    method render (line 141) | def render(self):
    method action_space_sample (line 147) | def action_space_sample(self):
    method _pretty_print (line 153) | def _pretty_print(self):
  function test_nine_rooms (line 157) | def test_nine_rooms():

FILE: lib/envs/windy_gridworld.py
  class WindyGridworldEnv (line 11) | class WindyGridworldEnv(discrete.DiscreteEnv):
    method _limit_coordinates (line 15) | def _limit_coordinates(self, coord):
    method _calculate_transition_prob (line 22) | def _calculate_transition_prob(self, current, delta, winds):
    method __init__ (line 29) | def __init__(self):
    method _render (line 56) | def _render(self, mode='human', close=False):

FILE: lib/plotting.py
  function plot_cost_to_go_mountain_car (line 10) | def plot_cost_to_go_mountain_car(env, estimator, num_tiles=20):
  function plot_value_function (line 28) | def plot_value_function(V, title="Value Function"):
  function plot_episode_stats (line 63) | def plot_episode_stats(stats, smoothing_window=10, noshow=False, dosave=...

FILE: q_learning/Q-Learning.py
  class QLearningAgent (line 51) | class QLearningAgent():
    method __init__ (line 53) | def __init__(self, env):
    method policy_exploration (line 70) | def policy_exploration(self, state, epsilon=0.0):
    method alpha_schedule (line 88) | def alpha_schedule(self, t, state, action):
    method q_learning (line 105) | def q_learning(self, num_episodes, max_ep_steps=10000, discount=1.0, e...

FILE: trpo/fxn_approx.py
  class LinearValueFunction (line 19) | class LinearValueFunction(object):
    method fit (line 23) | def fit(self, X, y):
    method predict (line 37) | def predict(self, X):
    method preproc (line 44) | def preproc(self, X):
  class NnValueFunction (line 49) | class NnValueFunction(object):
    method __init__ (line 52) | def __init__(self, session, ob_dim=None, n_epochs=10, stepsize=1e-3):
    method fit (line 71) | def fit(self, X, y):
    method predict (line 98) | def predict(self, X):
    method preproc (line 107) | def preproc(self, X):

FILE: trpo/main.py
  function run_trpo_algorithm (line 30) | def run_trpo_algorithm(args, vf_params, logdir):

FILE: trpo/trpo.py
  class TRPO (line 26) | class TRPO:
    method __init__ (line 29) | def __init__(self, args, sess, env, vf_params):
    method update_policy (line 155) | def update_policy(self, paths, infodict):
    method _flatgrad (line 239) | def _flatgrad(self, loss, var_list):
    method _act (line 260) | def _act(self, ob):
    method get_paths (line 277) | def get_paths(self, seed_iter, env):
    method compute_advantages (line 325) | def compute_advantages(self, paths):
    method fit_value_function (line 350) | def fit_value_function(self, paths, vfdict):
    method log_diagnostics (line 363) | def log_diagnostics(self, paths, infodict, vfdict):

FILE: trpo/utils_trpo.py
  function cg (line 14) | def cg(f_Ax, b, cg_iters=10, verbose=False, residual_tol=1e-10):
  function backtracking_line_search (line 68) | def backtracking_line_search(f, x, fullstep, expected_improve_rate,

FILE: utils/logz.py
  function colorize (line 29) | def colorize(string, color, bold=False, highlight=False):
  class G (line 38) | class G:
  function configure_output_dir (line 46) | def configure_output_dir(d=None):
  function log_tabular (line 63) | def log_tabular(key, val):
  function dump_tabular (line 76) | def dump_tabular():

FILE: utils/policies.py
  class StochasticPolicy (line 23) | class StochasticPolicy(object):
    method __init__ (line 25) | def __init__(self, sess, ob_dim, ac_dim):
    method sample_action (line 32) | def sample_action(self, x):
  class GibbsPolicy (line 37) | class GibbsPolicy(StochasticPolicy):
    method __init__ (line 41) | def __init__(self, sess, ob_dim, ac_dim):
    method sample_action (line 87) | def sample_action(self, ob):
    method update_policy (line 91) | def update_policy(self, ob_no, ac_n, std_adv_n, stepsize):
    method kldiv_and_entropy (line 106) | def kldiv_and_entropy(self, ob_no, oldlogits_na):
  class GaussianPolicy (line 117) | class GaussianPolicy(StochasticPolicy):
    method __init__ (line 121) | def __init__(self, sess, ob_dim, ac_dim):
    method sample_action (line 167) | def sample_action(self, ob):
    method update_policy (line 171) | def update_policy(self, ob_no, ac_n, std_adv_n, stepsize):
    method kldiv_and_entropy (line 190) | def kldiv_and_entropy(self, ob_no, oldmean_na, oldlogstd_a):

FILE: utils/utils_pg.py
  function gauss_log_prob_1 (line 12) | def gauss_log_prob_1(mu, logstd, x):
  function gauss_log_prob (line 23) | def gauss_log_prob(mu, logstd, x):
  function gauss_KL_1 (line 43) | def gauss_KL_1(mu1, logstd1, mu2, logstd2):
  function gauss_KL (line 56) | def gauss_KL(mu1, logstd1, mu2, logstd2):
  function normc_initializer (line 80) | def normc_initializer(std=1.0):
  function dense (line 89) | def dense(x, size, name, weight_init=None):
  function fancy_slice_2d (line 96) | def fancy_slice_2d(X, inds0, inds1):
  function discount (line 106) | def discount(x, gamma):
  function lrelu (line 114) | def lrelu(x, leak=0.2):
  function explained_variance_1d (line 121) | def explained_variance_1d(ypred,y):
  function categorical_sample_logits (line 131) | def categorical_sample_logits(logits):
  function pathlength (line 146) | def pathlength(path):

FILE: utils/value_functions.py
  class LinearValueFunction (line 13) | class LinearValueFunction(object):
    method __init__ (line 16) | def __init__(self):
    method fit (line 19) | def fit(self, X, y):
    method predict (line 33) | def predict(self, X):
    method preproc (line 40) | def preproc(self, X):
  class NnValueFunction (line 45) | class NnValueFunction(object):
    method __init__ (line 48) | def __init__(self, session, ob_dim=None, n_epochs=20, stepsize=1e-3):
    method fit (line 86) | def fit(self, X, y, session=None):
    method predict (line 103) | def predict(self, X):
    method preproc (line 113) | def preproc(self, X):

FILE: vpg/main.py
  function run_vpg (line 29) | def run_vpg(args, vf_params, logdir, env, sess, continuous_control):