SYMBOL INDEX (305 symbols across 36 files) FILE: bc/bc.py function get_tf_session (line 33) | def get_tf_session(): function load_dataset (line 50) | def load_dataset(args): function policy_model (line 119) | def policy_model(data_in, action_dim): function get_batch (line 152) | def get_batch(expert_obs, expert_act, batch_size): function run_bc (line 165) | def run_bc(session, args, log_dir): function run_bc_test (line 237) | def run_bc_test(args, session, policy_fn, x, env): FILE: bc/load_policy.py function load_policy (line 3) | def load_policy(filename): FILE: bc/plot_bc.py function plot_bc_modern (line 36) | def plot_bc_modern(edir): function plot_bc_humanoid (line 93) | def plot_bc_humanoid(edir): function boring_stuff (line 151) | def boring_stuff(axarr, edir): function plot_bc (line 172) | def plot_bc(e): FILE: bc/run_expert.py function main (line 30) | def main(): FILE: bc/tf_util.py function sum (line 18) | def sum(x, axis=None, keepdims=False): function mean (line 20) | def mean(x, axis=None, keepdims=False): function var (line 22) | def var(x, axis=None, keepdims=False): function std (line 25) | def std(x, axis=None, keepdims=False): function max (line 27) | def max(x, axis=None, keepdims=False): function min (line 29) | def min(x, axis=None, keepdims=False): function concatenate (line 31) | def concatenate(arrs, axis=0): function argmax (line 33) | def argmax(x, axis=None): function switch (line 36) | def switch(condition, then_expression, else_expression): function l2loss (line 55) | def l2loss(params): function lrelu (line 60) | def lrelu(x, leak=0.2): function categorical_sample_logits (line 64) | def categorical_sample_logits(X): function get_session (line 73) | def get_session(): function single_threaded_session (line 76) | def single_threaded_session(): function make_session (line 82) | def make_session(num_cpu): function initialize (line 90) | def initialize(): function eval (line 96) | def eval(expr, feed_dict=None): function set_value (line 100) | def set_value(v, val): function load_state (line 103) | def load_state(fname): function save_state (line 107) | def save_state(fname): function normc_initializer (line 117) | def normc_initializer(std=1.0): function conv2d (line 125) | def conv2d(x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad=... function dense (line 155) | def dense(x, size, name, weight_init=None, bias=True): function wndense (line 164) | def wndense(x, size, name, init_scale=1.0): function densenobias (line 175) | def densenobias(x, size, name, weight_init=None): function dropout (line 178) | def dropout(x, pkeep, phase=None, mask=None): function batchnorm (line 185) | def batchnorm(x, name, phase, updates, gamma=0.96): function function (line 213) | def function(inputs, outputs, updates=None, givens=None): class _Function (line 223) | class _Function(object): method __init__ (line 224) | def __init__(self, inputs, outputs, updates, givens, check_nan=False): method __call__ (line 232) | def __call__(self, *inputvals): function mem_friendly_function (line 242) | def mem_friendly_function(nondata_inputs, data_inputs, outputs, batch_si... class _MemFriendlyFunction (line 249) | class _MemFriendlyFunction(object): method __init__ (line 250) | def __init__(self, nondata_inputs, data_inputs, outputs, batch_size): method __call__ (line 255) | def __call__(self, *inputvals): class Module (line 281) | class Module(object): method __init__ (line 282) | def __init__(self, name): method __call__ (line 287) | def __call__(self, *args): method _call (line 303) | def _call(self, *args): method trainable_variables (line 307) | def trainable_variables(self): method variables (line 312) | def variables(self): function module (line 317) | def module(name): function get_parents (line 333) | def get_parents(node): function topsorted (line 336) | def topsorted(outputs): function var_shape (line 377) | def var_shape(x): function numel (line 383) | def numel(x): function intprod (line 386) | def intprod(x): function flatgrad (line 389) | def flatgrad(loss, var_list): class SetFromFlat (line 394) | class SetFromFlat(object): method __init__ (line 395) | def __init__(self, var_list, dtype=tf.float32): method __call__ (line 408) | def __call__(self, theta): class GetFlat (line 411) | class GetFlat(object): method __init__ (line 412) | def __init__(self, var_list): method __call__ (line 414) | def __call__(self): function fancy_slice_2d (line 422) | def fancy_slice_2d(X, inds0, inds1): function scope_vars (line 435) | def scope_vars(scope, trainable_only): function lengths_to_mask (line 445) | def lengths_to_mask(lengths_b, max_length): function in_session (line 463) | def in_session(f): function get_placeholder (line 472) | def get_placeholder(name, dtype, shape): function get_placeholder_cached (line 482) | def get_placeholder_cached(name): function flattenallbut0 (line 485) | def flattenallbut0(x): function reset (line 488) | def reset(): FILE: ddpg/ddpg.py class DDPGAgent (line 20) | class DDPGAgent(object): method __init__ (line 22) | def __init__(self, sess, env, test_env, args): method train (line 42) | def train(self): method _do_rollouts (line 103) | def _do_rollouts(self): method _debug_print (line 133) | def _debug_print(self): class Network (line 150) | class Network(object): method __init__ (line 157) | def __init__(self, sess, env, args): class Actor (line 178) | class Actor(Network): method __init__ (line 187) | def __init__(self, sess, env, args): method _build_net (line 224) | def _build_net(self, input_BO, scope): method sample_action (line 250) | def sample_action(self, obs, train=True): method update_target_net (line 273) | def update_target_net(self, smooth=True): method update_weights (line 285) | def update_weights(self, f, a_grads_BA): class Critic (line 294) | class Critic(Network): method __init__ (line 300) | def __init__(self, sess, env, args): method _build_net (line 345) | def _build_net(self, input_BO, acts_BO, scope): method update_target_net (line 374) | def update_target_net(self, smooth=True): method update_weights (line 386) | def update_weights(self, f): FILE: ddpg/replay_buffer.py class ReplayBuffer (line 5) | class ReplayBuffer(object): method __init__ (line 7) | def __init__(self, size, ob_dim, ac_dim): method add_sample (line 53) | def add_sample(self, s, a, r, done): method sample (line 77) | def sample(self, num): FILE: dqn/atari_wrappers.py class NoopResetEnv (line 8) | class NoopResetEnv(gym.Wrapper): method __init__ (line 9) | def __init__(self, env=None, noop_max=30): method _reset (line 17) | def _reset(self): class FireResetEnv (line 25) | class FireResetEnv(gym.Wrapper): method __init__ (line 26) | def __init__(self, env=None): method _reset (line 32) | def _reset(self): class EpisodicLifeEnv (line 38) | class EpisodicLifeEnv(gym.Wrapper): method __init__ (line 39) | def __init__(self, env=None): method _step (line 48) | def _step(self, action): method _reset (line 62) | def _reset(self): class MaxAndSkipEnv (line 77) | class MaxAndSkipEnv(gym.Wrapper): method __init__ (line 78) | def __init__(self, env=None, skip=4): method _step (line 85) | def _step(self, action): method _reset (line 99) | def _reset(self): function _process_frame84 (line 106) | def _process_frame84(frame): class ProcessFrame84 (line 114) | class ProcessFrame84(gym.Wrapper): method __init__ (line 115) | def __init__(self, env=None): method _step (line 119) | def _step(self, action): method _reset (line 123) | def _reset(self): class ClippedRewardsWrapper (line 126) | class ClippedRewardsWrapper(gym.Wrapper): method _step (line 127) | def _step(self, action): function wrap_deepmind_ram (line 131) | def wrap_deepmind_ram(env): function wrap_deepmind (line 140) | def wrap_deepmind(env): FILE: dqn/dqn.py function learn (line 15) | def learn(env, FILE: dqn/dqn_utils.py function huber_loss (line 8) | def huber_loss(x, delta=1.0): function sample_n_unique (line 16) | def sample_n_unique(sampling_f, n): class Schedule (line 27) | class Schedule(object): method value (line 28) | def value(self, t): class ConstantSchedule (line 32) | class ConstantSchedule(object): method __init__ (line 33) | def __init__(self, value): method value (line 42) | def value(self, t): function linear_interpolation (line 46) | def linear_interpolation(l, r, alpha): class PiecewiseSchedule (line 49) | class PiecewiseSchedule(object): method __init__ (line 50) | def __init__(self, endpoints, interpolation=linear_interpolation, outs... method value (line 74) | def value(self, t): class LinearSchedule (line 85) | class LinearSchedule(object): method __init__ (line 86) | def __init__(self, schedule_timesteps, final_p, initial_p=1.0): method value (line 104) | def value(self, t): function compute_exponential_averages (line 109) | def compute_exponential_averages(variables, decay): function minimize_and_clip (line 130) | def minimize_and_clip(optimizer, objective, var_list, clip_val=10): function initialize_interdependent_variables (line 141) | def initialize_interdependent_variables(session, vars_list, feed_dict): function get_wrapper_by_name (line 164) | def get_wrapper_by_name(env, classname): class ReplayBuffer (line 174) | class ReplayBuffer(object): method __init__ (line 175) | def __init__(self, size, frame_history_len): method can_sample (line 212) | def can_sample(self, batch_size): method _encode_sample (line 216) | def _encode_sample(self, idxes): method sample (line 226) | def sample(self, batch_size): method encode_recent_observation (line 263) | def encode_recent_observation(self): method _encode_observation (line 276) | def _encode_observation(self, idx): method store_frame (line 302) | def store_frame(self, frame): method store_effect (line 330) | def store_effect(self, idx, action, reward, done): FILE: dqn/plot_dqn.py function smoothed_block (line 19) | def smoothed_block(x, n): FILE: dqn/run_dqn_atari.py function atari_model (line 17) | def atari_model(img_in, num_actions, scope, reuse=False): function atari_learn (line 33) | def atari_learn(env, function get_available_gpus (line 86) | def get_available_gpus(): function set_global_seeds (line 92) | def set_global_seeds(i): function get_session (line 103) | def get_session(): function get_env (line 123) | def get_env(task, seed): function main (line 134) | def main(): FILE: dqn/run_dqn_ram.py function atari_model (line 15) | def atari_model(ram_in, num_actions, scope, reuse=False): function atari_learn (line 27) | def atari_learn(env, function get_available_gpus (line 77) | def get_available_gpus(): function set_global_seeds (line 82) | def set_global_seeds(i): function get_session (line 92) | def get_session(): function get_env (line 101) | def get_env(seed): function main (line 113) | def main(): FILE: es/es.py class ESAgent (line 23) | class ESAgent: method __init__ (line 25) | def __init__(self, session, args, log_dir=None, continuous=True): method _make_network (line 77) | def _make_network(self, data_in, out_dim): method _compute_return (line 101) | def _compute_return(self, test=False, store_info=False): method _print_summary (line 147) | def _print_summary(self): method run_es (line 160) | def run_es(self): method test (line 247) | def test(self, just_one=True): method generate_rollout_data (line 286) | def generate_rollout_data(self, weights, num_rollouts): FILE: es/logz.py function colorize (line 30) | def colorize(string, color, bold=False, highlight=False): class G (line 38) | class G: function configure_output_dir (line 45) | def configure_output_dir(d=None): function log_tabular (line 61) | def log_tabular(key, val): function dump_tabular (line 73) | def dump_tabular(): FILE: es/optimizers.py class Optimizer (line 10) | class Optimizer(object): method __init__ (line 11) | def __init__(self, pi): method update (line 16) | def update(self, globalg): method _compute_step (line 24) | def _compute_step(self, globalg): class SGD (line 28) | class SGD(Optimizer): method __init__ (line 29) | def __init__(self, pi, stepsize, momentum=0.9): method _compute_step (line 34) | def _compute_step(self, globalg): class Adam (line 40) | class Adam(Optimizer): method __init__ (line 41) | def __init__(self, pi, stepsize, beta1=0.9, beta2=0.999, epsilon=1e-08): method _compute_step (line 50) | def _compute_step(self, globalg): FILE: es/plot.py function plot_one_dir (line 55) | def plot_one_dir(args, directory): FILE: es/toy_es.py function f (line 26) | def f(w, sol): function run_es (line 31) | def run_es(args): FILE: es/utils.py function compute_ranks (line 12) | def compute_ranks(x): function compute_centered_ranks (line 26) | def compute_centered_ranks(x): function get_tf_session (line 60) | def get_tf_session(): function normc_initializer (line 77) | def normc_initializer(std=1.0): FILE: g_learning/G-Learning.py class GLearningAgent (line 30) | class GLearningAgent(): method __init__ (line 32) | def __init__(self, env, k): method policy_exploration (line 54) | def policy_exploration(self, state, epsilon=0.0): method alpha_schedule (line 72) | def alpha_schedule(self, t, state, action): method beta_schedule (line 88) | def beta_schedule(self, t): method g_learning (line 104) | def g_learning(self, num_episodes, max_ep_steps=10000, discount=1.0, e... FILE: lib/envs/blackjack.py function cmp (line 5) | def cmp(a, b): function draw_card (line 12) | def draw_card(np_random): function draw_hand (line 16) | def draw_hand(np_random): function usable_ace (line 20) | def usable_ace(hand): # Does this hand have a usable ace? function sum_hand (line 24) | def sum_hand(hand): # Return current hand total function is_bust (line 30) | def is_bust(hand): # Is this hand a bust? function score (line 34) | def score(hand): # What is the score of this hand (0 if bust) function is_natural (line 38) | def is_natural(hand): # Is this hand a natural blackjack? class BlackjackEnv (line 42) | class BlackjackEnv(gym.Env): method __init__ (line 67) | def __init__(self, natural=False): method _seed (line 82) | def _seed(self, seed=None): method _step (line 86) | def _step(self, action): method _get_obs (line 105) | def _get_obs(self): method _reset (line 108) | def _reset(self): FILE: lib/envs/cliff_walking.py class CliffWalkingEnv (line 15) | class CliffWalkingEnv(discrete.DiscreteEnv): method _limit_coordinates (line 19) | def _limit_coordinates(self, coord): method _calculate_transition_prob (line 26) | def _calculate_transition_prob(self, current, delta): method __init__ (line 42) | def __init__(self): method _render (line 68) | def _render(self, mode='human', close=False): FILE: lib/envs/gridworld.py class GridworldEnv (line 10) | class GridworldEnv(discrete.DiscreteEnv): method __init__ (line 32) | def __init__(self, shape=[4,4]): method _render (line 85) | def _render(self, mode='human', close=False): FILE: lib/envs/two_room_domain.py class TwoRooms (line 53) | class TwoRooms: method __init__ (line 55) | def __init__(self, length=9): method _init_grid (line 64) | def _init_grid(self): method _check_coords_and_move (line 90) | def _check_coords_and_move(self, coord): method step (line 103) | def step(self, action): method reset (line 136) | def reset(self): method render (line 141) | def render(self): method action_space_sample (line 147) | def action_space_sample(self): method _pretty_print (line 153) | def _pretty_print(self): function test_nine_rooms (line 157) | def test_nine_rooms(): FILE: lib/envs/windy_gridworld.py class WindyGridworldEnv (line 11) | class WindyGridworldEnv(discrete.DiscreteEnv): method _limit_coordinates (line 15) | def _limit_coordinates(self, coord): method _calculate_transition_prob (line 22) | def _calculate_transition_prob(self, current, delta, winds): method __init__ (line 29) | def __init__(self): method _render (line 56) | def _render(self, mode='human', close=False): FILE: lib/plotting.py function plot_cost_to_go_mountain_car (line 10) | def plot_cost_to_go_mountain_car(env, estimator, num_tiles=20): function plot_value_function (line 28) | def plot_value_function(V, title="Value Function"): function plot_episode_stats (line 63) | def plot_episode_stats(stats, smoothing_window=10, noshow=False, dosave=... FILE: q_learning/Q-Learning.py class QLearningAgent (line 51) | class QLearningAgent(): method __init__ (line 53) | def __init__(self, env): method policy_exploration (line 70) | def policy_exploration(self, state, epsilon=0.0): method alpha_schedule (line 88) | def alpha_schedule(self, t, state, action): method q_learning (line 105) | def q_learning(self, num_episodes, max_ep_steps=10000, discount=1.0, e... FILE: trpo/fxn_approx.py class LinearValueFunction (line 19) | class LinearValueFunction(object): method fit (line 23) | def fit(self, X, y): method predict (line 37) | def predict(self, X): method preproc (line 44) | def preproc(self, X): class NnValueFunction (line 49) | class NnValueFunction(object): method __init__ (line 52) | def __init__(self, session, ob_dim=None, n_epochs=10, stepsize=1e-3): method fit (line 71) | def fit(self, X, y): method predict (line 98) | def predict(self, X): method preproc (line 107) | def preproc(self, X): FILE: trpo/main.py function run_trpo_algorithm (line 30) | def run_trpo_algorithm(args, vf_params, logdir): FILE: trpo/trpo.py class TRPO (line 26) | class TRPO: method __init__ (line 29) | def __init__(self, args, sess, env, vf_params): method update_policy (line 155) | def update_policy(self, paths, infodict): method _flatgrad (line 239) | def _flatgrad(self, loss, var_list): method _act (line 260) | def _act(self, ob): method get_paths (line 277) | def get_paths(self, seed_iter, env): method compute_advantages (line 325) | def compute_advantages(self, paths): method fit_value_function (line 350) | def fit_value_function(self, paths, vfdict): method log_diagnostics (line 363) | def log_diagnostics(self, paths, infodict, vfdict): FILE: trpo/utils_trpo.py function cg (line 14) | def cg(f_Ax, b, cg_iters=10, verbose=False, residual_tol=1e-10): function backtracking_line_search (line 68) | def backtracking_line_search(f, x, fullstep, expected_improve_rate, FILE: utils/logz.py function colorize (line 29) | def colorize(string, color, bold=False, highlight=False): class G (line 38) | class G: function configure_output_dir (line 46) | def configure_output_dir(d=None): function log_tabular (line 63) | def log_tabular(key, val): function dump_tabular (line 76) | def dump_tabular(): FILE: utils/policies.py class StochasticPolicy (line 23) | class StochasticPolicy(object): method __init__ (line 25) | def __init__(self, sess, ob_dim, ac_dim): method sample_action (line 32) | def sample_action(self, x): class GibbsPolicy (line 37) | class GibbsPolicy(StochasticPolicy): method __init__ (line 41) | def __init__(self, sess, ob_dim, ac_dim): method sample_action (line 87) | def sample_action(self, ob): method update_policy (line 91) | def update_policy(self, ob_no, ac_n, std_adv_n, stepsize): method kldiv_and_entropy (line 106) | def kldiv_and_entropy(self, ob_no, oldlogits_na): class GaussianPolicy (line 117) | class GaussianPolicy(StochasticPolicy): method __init__ (line 121) | def __init__(self, sess, ob_dim, ac_dim): method sample_action (line 167) | def sample_action(self, ob): method update_policy (line 171) | def update_policy(self, ob_no, ac_n, std_adv_n, stepsize): method kldiv_and_entropy (line 190) | def kldiv_and_entropy(self, ob_no, oldmean_na, oldlogstd_a): FILE: utils/utils_pg.py function gauss_log_prob_1 (line 12) | def gauss_log_prob_1(mu, logstd, x): function gauss_log_prob (line 23) | def gauss_log_prob(mu, logstd, x): function gauss_KL_1 (line 43) | def gauss_KL_1(mu1, logstd1, mu2, logstd2): function gauss_KL (line 56) | def gauss_KL(mu1, logstd1, mu2, logstd2): function normc_initializer (line 80) | def normc_initializer(std=1.0): function dense (line 89) | def dense(x, size, name, weight_init=None): function fancy_slice_2d (line 96) | def fancy_slice_2d(X, inds0, inds1): function discount (line 106) | def discount(x, gamma): function lrelu (line 114) | def lrelu(x, leak=0.2): function explained_variance_1d (line 121) | def explained_variance_1d(ypred,y): function categorical_sample_logits (line 131) | def categorical_sample_logits(logits): function pathlength (line 146) | def pathlength(path): FILE: utils/value_functions.py class LinearValueFunction (line 13) | class LinearValueFunction(object): method __init__ (line 16) | def __init__(self): method fit (line 19) | def fit(self, X, y): method predict (line 33) | def predict(self, X): method preproc (line 40) | def preproc(self, X): class NnValueFunction (line 45) | class NnValueFunction(object): method __init__ (line 48) | def __init__(self, session, ob_dim=None, n_epochs=20, stepsize=1e-3): method fit (line 86) | def fit(self, X, y, session=None): method predict (line 103) | def predict(self, X): method preproc (line 113) | def preproc(self, X): FILE: vpg/main.py function run_vpg (line 29) | def run_vpg(args, vf_params, logdir, env, sess, continuous_control):