SYMBOL INDEX (1041 symbols across 131 files) FILE: baselines/a2c/a2c.py class Model (line 19) | class Model(object): method __init__ (line 33) | def __init__(self, policy, env, nsteps, function learn (line 119) | def learn( FILE: baselines/a2c/runner.py class Runner (line 5) | class Runner(AbstractEnvRunner): method __init__ (line 15) | def __init__(self, env, model, nsteps=5, gamma=0.99): method run (line 21) | def run(self): FILE: baselines/a2c/utils.py function sample (line 6) | def sample(logits): function cat_entropy (line 10) | def cat_entropy(logits): function cat_entropy_softmax (line 17) | def cat_entropy_softmax(p0): function ortho_init (line 20) | def ortho_init(scale=1.0): function conv (line 37) | def conv(x, scope, *, nf, rf, stride, pad='VALID', init_scale=1.0, data_... function fc (line 58) | def fc(x, scope, nh, *, init_scale=1.0, init_bias=0.0): function batch_to_seq (line 65) | def batch_to_seq(h, nbatch, nsteps, flat=False): function seq_to_batch (line 72) | def seq_to_batch(h, flat = False): function lstm (line 81) | def lstm(xs, ms, s, scope, nh, init_scale=1.0): function _ln (line 104) | def _ln(x, g, b, e=1e-5, axes=[1]): function lnlstm (line 110) | def lnlstm(xs, ms, s, scope, nh, init_scale=1.0): function conv_to_fc (line 142) | def conv_to_fc(x): function discount_with_dones (line 147) | def discount_with_dones(rewards, dones, gamma): function find_trainable_variables (line 155) | def find_trainable_variables(key): function make_path (line 158) | def make_path(f): function constant (line 161) | def constant(p): function linear (line 164) | def linear(p): function middle_drop (line 167) | def middle_drop(p): function double_linear_con (line 173) | def double_linear_con(p): function double_middle_drop (line 180) | def double_middle_drop(p): class Scheduler (line 197) | class Scheduler(object): method __init__ (line 199) | def __init__(self, v, nvalues, schedule): method value (line 205) | def value(self): method value_steps (line 210) | def value_steps(self, steps): class EpisodeStats (line 214) | class EpisodeStats: method __init__ (line 215) | def __init__(self, nsteps, nenvs): method feed (line 224) | def feed(self, rewards, masks): method mean_length (line 237) | def mean_length(self): method mean_reward (line 243) | def mean_reward(self): function get_by_index (line 251) | def get_by_index(x, idx): function check_shape (line 259) | def check_shape(ts,shapes): function avg_norm (line 265) | def avg_norm(t): function gradient_add (line 268) | def gradient_add(g1, g2, param): function q_explained_variance (line 278) | def q_explained_variance(qpred, q): FILE: baselines/acer/acer.py function strip (line 21) | def strip(var, nenvs, nsteps, flat = False): function q_retrace (line 25) | def q_retrace(R, D, q_i, v, rho_i, nenvs, nsteps, gamma): class Model (line 58) | class Model(object): method __init__ (line 59) | def __init__(self, policy, ob_space, ac_space, nenvs, nsteps, ent_coef... class Acer (line 230) | class Acer(): method __init__ (line 231) | def __init__(self, runner, model, buffer, log_interval): method call (line 240) | def call(self, on_policy): function learn (line 275) | def learn(network, env, seed=None, nsteps=20, total_timesteps=int(80e6),... FILE: baselines/acer/buffer.py class Buffer (line 3) | class Buffer(object): method __init__ (line 5) | def __init__(self, env, nsteps, size=50000): method has_atleast (line 30) | def has_atleast(self, frames): method can_sample (line 35) | def can_sample(self): method decode (line 39) | def decode(self, enc_obs, dones): method put (line 47) | def put(self, enc_obs, actions, rewards, mus, dones, masks): method take (line 70) | def take(self, x, idx, envx): method get (line 77) | def get(self): function _stack_obs_ref (line 101) | def _stack_obs_ref(enc_obs, dones, nsteps): function _stack_obs (line 124) | def _stack_obs(enc_obs, dones, nsteps): function test_stack_obs (line 142) | def test_stack_obs(): FILE: baselines/acer/defaults.py function atari (line 1) | def atari(): FILE: baselines/acer/policies.py class AcerCnnPolicy (line 7) | class AcerCnnPolicy(object): method __init__ (line 9) | def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reu... class AcerLstmPolicy (line 45) | class AcerLstmPolicy(object): method __init__ (line 47) | def __init__(self, sess, ob_space, ac_space, nenv, nsteps, nstack, reu... FILE: baselines/acer/runner.py class Runner (line 7) | class Runner(AbstractEnvRunner): method __init__ (line 9) | def __init__(self, env, model, nsteps): method run (line 26) | def run(self): FILE: baselines/acktr/acktr.py class Model (line 18) | class Model(object): method __init__ (line 20) | def __init__(self, policy, ob_space, ac_space, nenvs,total_timesteps, ... function learn (line 95) | def learn(network, env, seed, total_timesteps=int(40e6), gamma=0.99, log... FILE: baselines/acktr/defaults.py function mujoco (line 1) | def mujoco(): FILE: baselines/acktr/kfac.py class KfacOptimizer (line 13) | class KfacOptimizer(): method __init__ (line 15) | def __init__(self, learning_rate=0.01, momentum=0.9, clip_kl=0.01, kfa... method getFactors (line 58) | def getFactors(self, g, varlist): method getStats (line 183) | def getStats(self, factors, varlist): method compute_and_apply_stats (line 285) | def compute_and_apply_stats(self, loss_sampled, var_list=None): method compute_stats (line 293) | def compute_stats(self, loss_sampled, var_list=None): method apply_stats (line 440) | def apply_stats(self, statsUpdates): method _apply_stats (line 476) | def _apply_stats(self, statsUpdates, accumulate=False, accumulateCoeff... method getStatsEigen (line 512) | def getStatsEigen(self, stats=None): method computeStatsEigen (line 538) | def computeStatsEigen(self): method applyStatsEigen (line 602) | def applyStatsEigen(self, eigen_list): method getKfacPrecondUpdates (line 618) | def getKfacPrecondUpdates(self, gradlist, varlist): method compute_gradients (line 803) | def compute_gradients(self, loss, var_list=None): method apply_gradients_kfac (line 811) | def apply_gradients_kfac(self, grads): method apply_gradients (line 897) | def apply_gradients(self, grads): method minimize (line 924) | def minimize(self, loss, loss_sampled, var_list=None): FILE: baselines/acktr/kfac_utils.py function gmatmul (line 3) | def gmatmul(a, b, transpose_a=False, transpose_b=False, reduce_dim=None): function clipoutNeg (line 55) | def clipoutNeg(vec, threshold=1e-6): function detectMinVal (line 60) | def detectMinVal(input_mat, var, threshold=1e-6, name='', debug=False): function factorReshape (line 73) | def factorReshape(Q, e, grad, facIndx=0, ftype='act'): FILE: baselines/acktr/utils.py function dense (line 3) | def dense(x, size, name, weight_init=None, bias_init=0, weight_loss_dict... function kl_div (line 21) | def kl_div(action_dist1, action_dist2, action_size): FILE: baselines/bench/benchmarks.py function register_benchmark (line 13) | def register_benchmark(benchmark): function list_benchmarks (line 26) | def list_benchmarks(): function get_benchmark (line 30) | def get_benchmark(benchmark_name): function get_task (line 37) | def get_task(benchmark, env_id): function find_task_for_env_id_in_any_benchmark (line 42) | def find_task_for_env_id_in_any_benchmark(env_id): FILE: baselines/bench/monitor.py class Monitor (line 10) | class Monitor(Wrapper): method __init__ (line 14) | def __init__(self, env, filename, allow_early_resets=False, reset_keyw... method reset (line 35) | def reset(self, **kwargs): method reset_state (line 44) | def reset_state(self): method step (line 51) | def step(self, action): method update (line 58) | def update(self, ob, rew, done, info): method close (line 79) | def close(self): method get_total_steps (line 84) | def get_total_steps(self): method get_episode_rewards (line 87) | def get_episode_rewards(self): method get_episode_lengths (line 90) | def get_episode_lengths(self): method get_episode_times (line 93) | def get_episode_times(self): class LoadMonitorResultsError (line 96) | class LoadMonitorResultsError(Exception): class ResultsWriter (line 100) | class ResultsWriter(object): method __init__ (line 101) | def __init__(self, filename, header='', extra_keys=()): method write_row (line 117) | def write_row(self, epinfo): function get_monitor_files (line 123) | def get_monitor_files(dir): function load_results (line 126) | def load_results(dir): FILE: baselines/bench/test_monitor.py function test_monitor (line 5) | def test_monitor(): FILE: baselines/common/atari_wrappers.py class NoopResetEnv (line 12) | class NoopResetEnv(gym.Wrapper): method __init__ (line 13) | def __init__(self, env, noop_max=30): method reset (line 23) | def reset(self, **kwargs): method step (line 38) | def step(self, ac): class FireResetEnv (line 41) | class FireResetEnv(gym.Wrapper): method __init__ (line 42) | def __init__(self, env): method reset (line 48) | def reset(self, **kwargs): method step (line 58) | def step(self, ac): class EpisodicLifeEnv (line 61) | class EpisodicLifeEnv(gym.Wrapper): method __init__ (line 62) | def __init__(self, env): method step (line 70) | def step(self, action): method reset (line 84) | def reset(self, **kwargs): class MaxAndSkipEnv (line 97) | class MaxAndSkipEnv(gym.Wrapper): method __init__ (line 98) | def __init__(self, env, skip=4): method step (line 105) | def step(self, action): method reset (line 122) | def reset(self, **kwargs): class ClipRewardEnv (line 125) | class ClipRewardEnv(gym.RewardWrapper): method __init__ (line 126) | def __init__(self, env): method reward (line 129) | def reward(self, reward): class WarpFrame (line 134) | class WarpFrame(gym.ObservationWrapper): method __init__ (line 135) | def __init__(self, env, width=84, height=84, grayscale=True, dict_spac... method observation (line 166) | def observation(self, obs): class FrameStack (line 188) | class FrameStack(gym.Wrapper): method __init__ (line 189) | def __init__(self, env, k): method reset (line 204) | def reset(self): method step (line 210) | def step(self, action): method _get_ob (line 215) | def _get_ob(self): class ScaledFloatFrame (line 219) | class ScaledFloatFrame(gym.ObservationWrapper): method __init__ (line 220) | def __init__(self, env): method observation (line 224) | def observation(self, observation): class LazyFrames (line 229) | class LazyFrames(object): method __init__ (line 230) | def __init__(self, frames): method _force (line 241) | def _force(self): method __array__ (line 247) | def __array__(self, dtype=None): method __len__ (line 253) | def __len__(self): method __getitem__ (line 256) | def __getitem__(self, i): method count (line 259) | def count(self): method frame (line 263) | def frame(self, i): function make_atari (line 266) | def make_atari(env_id, max_episode_steps=None): function wrap_deepmind (line 275) | def wrap_deepmind(env, episode_life=True, clip_rewards=True, frame_stack... FILE: baselines/common/cg.py function cg (line 2) | def cg(f_Ax, b, cg_iters=10, callback=None, verbose=False, residual_tol=... FILE: baselines/common/cmd_util.py function make_vec_env (line 22) | def make_vec_env(env_id, env_type, num_env, seed, function make_env (line 62) | def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_... function make_mujoco_env (line 108) | def make_mujoco_env(env_id, seed, reward_scale=1.0): function make_robotics_env (line 124) | def make_robotics_env(env_id, seed, rank=0): function arg_parser (line 137) | def arg_parser(): function atari_arg_parser (line 144) | def atari_arg_parser(): function mujoco_arg_parser (line 151) | def mujoco_arg_parser(): function common_arg_parser (line 155) | def common_arg_parser(): function robotics_arg_parser (line 176) | def robotics_arg_parser(): function parse_unknown_args (line 187) | def parse_unknown_args(args): FILE: baselines/common/console_util.py function fmt_row (line 12) | def fmt_row(width, row, header=False): function fmt_item (line 17) | def fmt_item(x, l): function colorize (line 42) | def colorize(string, color='green', bold=False, highlight=False): function print_cmd (line 50) | def print_cmd(cmd, dry=False): function get_git_commit (line 58) | def get_git_commit(cwd=None): function get_git_commit_message (line 61) | def get_git_commit_message(cwd=None): function ccap (line 64) | def ccap(cmd, dry=False, env=None, **kwargs): function timed (line 73) | def timed(msg): FILE: baselines/common/dataset.py class Dataset (line 3) | class Dataset(object): method __init__ (line 4) | def __init__(self, data_map, deterministic=False, shuffle=True): method shuffle (line 12) | def shuffle(self): method next_batch (line 23) | def next_batch(self, batch_size): method iterate_once (line 36) | def iterate_once(self, batch_size): method subset (line 43) | def subset(self, num_elements, deterministic=True): function iterbatches (line 50) | def iterbatches(arrays, *, num_batches=None, batch_size=None, shuffle=Tr... FILE: baselines/common/distributions.py class Pd (line 7) | class Pd(object): method flatparam (line 11) | def flatparam(self): method mode (line 13) | def mode(self): method neglogp (line 15) | def neglogp(self, x): method kl (line 18) | def kl(self, other): method entropy (line 20) | def entropy(self): method sample (line 22) | def sample(self): method logp (line 24) | def logp(self, x): method get_shape (line 26) | def get_shape(self): method shape (line 29) | def shape(self): method __getitem__ (line 31) | def __getitem__(self, idx): class PdType (line 34) | class PdType(object): method pdclass (line 38) | def pdclass(self): method pdfromflat (line 40) | def pdfromflat(self, flat): method pdfromlatent (line 42) | def pdfromlatent(self, latent_vector, init_scale, init_bias): method param_shape (line 44) | def param_shape(self): method sample_shape (line 46) | def sample_shape(self): method sample_dtype (line 48) | def sample_dtype(self): method param_placeholder (line 51) | def param_placeholder(self, prepend_shape, name=None): method sample_placeholder (line 53) | def sample_placeholder(self, prepend_shape, name=None): method __eq__ (line 56) | def __eq__(self, other): class CategoricalPdType (line 59) | class CategoricalPdType(PdType): method __init__ (line 60) | def __init__(self, ncat): method pdclass (line 62) | def pdclass(self): method pdfromlatent (line 64) | def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0): method param_shape (line 68) | def param_shape(self): method sample_shape (line 70) | def sample_shape(self): method sample_dtype (line 72) | def sample_dtype(self): class MultiCategoricalPdType (line 76) | class MultiCategoricalPdType(PdType): method __init__ (line 77) | def __init__(self, nvec): method pdclass (line 80) | def pdclass(self): method pdfromflat (line 82) | def pdfromflat(self, flat): method pdfromlatent (line 85) | def pdfromlatent(self, latent, init_scale=1.0, init_bias=0.0): method param_shape (line 89) | def param_shape(self): method sample_shape (line 91) | def sample_shape(self): method sample_dtype (line 93) | def sample_dtype(self): class DiagGaussianPdType (line 96) | class DiagGaussianPdType(PdType): method __init__ (line 97) | def __init__(self, size): method pdclass (line 99) | def pdclass(self): method pdfromlatent (line 102) | def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0): method param_shape (line 108) | def param_shape(self): method sample_shape (line 110) | def sample_shape(self): method sample_dtype (line 112) | def sample_dtype(self): class BernoulliPdType (line 115) | class BernoulliPdType(PdType): method __init__ (line 116) | def __init__(self, size): method pdclass (line 118) | def pdclass(self): method param_shape (line 120) | def param_shape(self): method sample_shape (line 122) | def sample_shape(self): method sample_dtype (line 124) | def sample_dtype(self): method pdfromlatent (line 126) | def pdfromlatent(self, latent_vector, init_scale=1.0, init_bias=0.0): class CategoricalPd (line 153) | class CategoricalPd(Pd): method __init__ (line 154) | def __init__(self, logits): method flatparam (line 156) | def flatparam(self): method mode (line 158) | def mode(self): method mean (line 162) | def mean(self): method neglogp (line 164) | def neglogp(self, x): method kl (line 184) | def kl(self, other): method entropy (line 193) | def entropy(self): method sample (line 199) | def sample(self): method fromflat (line 203) | def fromflat(cls, flat): class MultiCategoricalPd (line 206) | class MultiCategoricalPd(Pd): method __init__ (line 207) | def __init__(self, nvec, flat): method flatparam (line 211) | def flatparam(self): method mode (line 213) | def mode(self): method neglogp (line 215) | def neglogp(self, x): method kl (line 217) | def kl(self, other): method entropy (line 219) | def entropy(self): method sample (line 221) | def sample(self): method fromflat (line 224) | def fromflat(cls, flat): class DiagGaussianPd (line 227) | class DiagGaussianPd(Pd): method __init__ (line 228) | def __init__(self, flat): method flatparam (line 234) | def flatparam(self): method mode (line 236) | def mode(self): method neglogp (line 238) | def neglogp(self, x): method kl (line 242) | def kl(self, other): method entropy (line 245) | def entropy(self): method sample (line 247) | def sample(self): method fromflat (line 250) | def fromflat(cls, flat): class BernoulliPd (line 254) | class BernoulliPd(Pd): method __init__ (line 255) | def __init__(self, logits): method flatparam (line 258) | def flatparam(self): method mean (line 261) | def mean(self): method mode (line 263) | def mode(self): method neglogp (line 265) | def neglogp(self, x): method kl (line 267) | def kl(self, other): method entropy (line 269) | def entropy(self): method sample (line 271) | def sample(self): method fromflat (line 275) | def fromflat(cls, flat): function make_pdtype (line 278) | def make_pdtype(ac_space): function shape_el (line 292) | def shape_el(v, i): function test_probtypes (line 300) | def test_probtypes(): function validate_probtype (line 321) | def validate_probtype(probtype, pdparam): function _matching_fc (line 351) | def _matching_fc(tensor, name, size, init_scale, init_bias): FILE: baselines/common/input.py function observation_placeholder (line 5) | def observation_placeholder(ob_space, batch_size=None, name='Ob'): function observation_input (line 34) | def observation_input(ob_space, batch_size=None, name='Ob'): function encode_observation (line 43) | def encode_observation(ob_space, placeholder): FILE: baselines/common/math_util.py function discount (line 5) | def discount(x, gamma): function explained_variance (line 25) | def explained_variance(ypred,y): function explained_variance_2d (line 40) | def explained_variance_2d(ypred, y): function ncc (line 47) | def ncc(ypred, y): function flatten_arrays (line 50) | def flatten_arrays(arrs): function unflatten_vector (line 53) | def unflatten_vector(vec, shapes): function discount_with_boundaries (line 63) | def discount_with_boundaries(X, New, gamma): function test_discount_with_boundaries (line 75) | def test_discount_with_boundaries(): FILE: baselines/common/misc_util.py function zipsame (line 10) | def zipsame(*seqs): class EzPickle (line 16) | class EzPickle(object): method __init__ (line 36) | def __init__(self, *args, **kwargs): method __getstate__ (line 40) | def __getstate__(self): method __setstate__ (line 43) | def __setstate__(self, d): function set_global_seeds (line 48) | def set_global_seeds(i): function pretty_eta (line 65) | def pretty_eta(seconds_left): class RunningAvg (line 107) | class RunningAvg(object): method __init__ (line 108) | def __init__(self, gamma, init_value=None): method update (line 123) | def update(self, new_val): method __float__ (line 136) | def __float__(self): function boolean_flag (line 140) | def boolean_flag(parser, name, default=False, help=None): function get_wrapper_by_name (line 159) | def get_wrapper_by_name(env, classname): function relatively_safe_pickle_dump (line 185) | def relatively_safe_pickle_dump(obj, path, compression=False): function pickle_load (line 221) | def pickle_load(path, compression=False): FILE: baselines/common/models.py function register (line 9) | def register(name): function nature_cnn (line 15) | def nature_cnn(unscaled_images, **conv_kwargs): function build_impala_cnn (line 28) | def build_impala_cnn(unscaled_images, depths=[16,32,32], **conv_kwargs): function mlp (line 75) | def mlp(num_layers=2, num_hidden=64, activation=tf.tanh, layer_norm=False): function cnn (line 107) | def cnn(**conv_kwargs): function impala_cnn (line 113) | def impala_cnn(**conv_kwargs): function cnn_small (line 119) | def cnn_small(**conv_kwargs): function lstm (line 132) | def lstm(nlstm=128, layer_norm=False): function cnn_lstm (line 187) | def cnn_lstm(nlstm=128, layer_norm=False, conv_fn=nature_cnn, **conv_kwa... function impala_cnn_lstm (line 213) | def impala_cnn_lstm(): function cnn_lnlstm (line 217) | def cnn_lnlstm(nlstm=128, **conv_kwargs): function conv_only (line 222) | def conv_only(convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], **conv_kwargs): function _normalize_clip_observation (line 251) | def _normalize_clip_observation(x, clip_range=[-5.0, 5.0]): function get_network_builder (line 257) | def get_network_builder(name): FILE: baselines/common/mpi_adam.py class MpiAdam (line 10) | class MpiAdam(object): method __init__ (line 11) | def __init__(self, var_list, *, beta1=0.9, beta2=0.999, epsilon=1e-08,... method update (line 25) | def update(self, localg, stepsize): method sync (line 44) | def sync(self): method check_synced (line 51) | def check_synced(self): function test_MpiAdam (line 64) | def test_MpiAdam(): FILE: baselines/common/mpi_adam_optimizer.py class MpiAdamOptimizer (line 11) | class MpiAdamOptimizer(tf.train.AdamOptimizer): method __init__ (line 13) | def __init__(self, comm, grad_clip=None, mpi_rank_weight=1, **kwargs): method compute_gradients (line 18) | def compute_gradients(self, loss, var_list, **kwargs): function check_synced (line 53) | def check_synced(localval, comm=None): function test_nonfreeze (line 71) | def test_nonfreeze(): FILE: baselines/common/mpi_fork.py function mpi_fork (line 3) | def mpi_fork(n, bind_to_core=False): FILE: baselines/common/mpi_moments.py function mpi_mean (line 6) | def mpi_mean(x, axis=0, comm=None, keepdims=False): function mpi_moments (line 20) | def mpi_moments(x, axis=0, comm=None, keepdims=False): function test_runningmeanstd (line 35) | def test_runningmeanstd(): function _helper_runningmeanstd (line 41) | def _helper_runningmeanstd(): FILE: baselines/common/mpi_running_mean_std.py class RunningMeanStd (line 8) | class RunningMeanStd(object): method __init__ (line 10) | def __init__(self, epsilon=1e-2, shape=()): method update (line 41) | def update(self, x): function test_runningmeanstd (line 51) | def test_runningmeanstd(): function test_dist (line 70) | def test_dist(): FILE: baselines/common/mpi_util.py function sync_from_root (line 15) | def sync_from_root(sess, variables, comm=None): function gpu_count (line 28) | def gpu_count(): function setup_mpi_gpus (line 37) | def setup_mpi_gpus(): function get_local_rank_size (line 49) | def get_local_rank_size(comm): function share_file (line 69) | def share_file(comm, path): function dict_gather (line 87) | def dict_gather(comm, d, op='mean', assert_all_have_data=True): function mpi_weighted_mean (line 110) | def mpi_weighted_mean(comm, local_name2valcount): FILE: baselines/common/plot_util.py function smooth (line 11) | def smooth(y, radius, mode='two_sided', valid_only=False): function one_sided_ema (line 39) | def one_sided_ema(xolds, yolds, low=None, high=None, n=512, decay_steps=... function symmetric_ema (line 111) | def symmetric_ema(xolds, yolds, low=None, high=None, n=512, decay_steps=... function load_results (line 152) | def load_results(root_dir_or_dirs, enable_progress=True, enable_monitor=... function default_xy_fn (line 227) | def default_xy_fn(r): function default_split_fn (line 232) | def default_split_fn(r): function plot_results (line 240) | def plot_results( function regression_analysis (line 407) | def regression_analysis(df): function test_smooth (line 416) | def test_smooth(): FILE: baselines/common/policies.py class PolicyWithValue (line 13) | class PolicyWithValue(object): method __init__ (line 18) | def __init__(self, env, observations, latent, estimate_q=False, vf_lat... method _evaluate (line 66) | def _evaluate(self, variables, observation, **extra_feed): method step (line 77) | def step(self, observation, **extra_feed): method value (line 98) | def value(self, ob, *args, **kwargs): method save (line 115) | def save(self, save_path): method load (line 118) | def load(self, load_path): function build_policy (line 121) | def build_policy(env, policy_network, value_network=None, normalize_obs... function _normalize_clip_observation (line 182) | def _normalize_clip_observation(x, clip_range=[-5.0, 5.0]): FILE: baselines/common/retro_wrappers.py class StochasticFrameSkip (line 10) | class StochasticFrameSkip(gym.Wrapper): method __init__ (line 11) | def __init__(self, env, n, stickprob): method reset (line 19) | def reset(self, **kwargs): method step (line 23) | def step(self, ac): method seed (line 45) | def seed(self, s): class PartialFrameStack (line 48) | class PartialFrameStack(gym.Wrapper): method __init__ (line 49) | def __init__(self, env, k, channel=1): method reset (line 63) | def reset(self): method step (line 70) | def step(self, ac): method _get_ob (line 75) | def _get_ob(self): class Downsample (line 80) | class Downsample(gym.ObservationWrapper): method __init__ (line 81) | def __init__(self, env, ratio): method observation (line 91) | def observation(self, frame): class Rgb2gray (line 98) | class Rgb2gray(gym.ObservationWrapper): method __init__ (line 99) | def __init__(self, env): method observation (line 108) | def observation(self, frame): class MovieRecord (line 113) | class MovieRecord(gym.Wrapper): method __init__ (line 114) | def __init__(self, env, savedir, k): method reset (line 119) | def reset(self): class AppendTimeout (line 128) | class AppendTimeout(gym.Wrapper): method __init__ (line 129) | def __init__(self, env): method step (line 154) | def step(self, ac): method reset (line 159) | def reset(self): method _process (line 163) | def _process(self, ob): class StartDoingRandomActionsWrapper (line 170) | class StartDoingRandomActionsWrapper(gym.Wrapper): method __init__ (line 174) | def __init__(self, env, max_random_steps, on_startup=True, every_episo... method some_random_steps (line 183) | def some_random_steps(self): method reset (line 191) | def reset(self): method step (line 194) | def step(self, a): function make_retro (line 202) | def make_retro(*, game, state=None, max_episode_steps=4500, **kwargs): function wrap_deepmind_retro (line 212) | def wrap_deepmind_retro(env, scale=True, frame_stack=4): class SonicDiscretizer (line 224) | class SonicDiscretizer(gym.ActionWrapper): method __init__ (line 229) | def __init__(self, env): method action (line 242) | def action(self, a): # pylint: disable=W0221 class RewardScaler (line 245) | class RewardScaler(gym.RewardWrapper): method __init__ (line 251) | def __init__(self, env, scale=0.01): method reward (line 255) | def reward(self, reward): class AllowBacktracking (line 258) | class AllowBacktracking(gym.Wrapper): method __init__ (line 265) | def __init__(self, env): method reset (line 270) | def reset(self, **kwargs): # pylint: disable=E0202 method step (line 275) | def step(self, action): # pylint: disable=E0202 FILE: baselines/common/runners.py class AbstractEnvRunner (line 4) | class AbstractEnvRunner(ABC): method __init__ (line 5) | def __init__(self, *, env, model, nsteps): method run (line 17) | def run(self): FILE: baselines/common/running_mean_std.py class RunningMeanStd (line 5) | class RunningMeanStd(object): method __init__ (line 7) | def __init__(self, epsilon=1e-4, shape=()): method update (line 12) | def update(self, x): method update_from_moments (line 18) | def update_from_moments(self, batch_mean, batch_var, batch_count): function update_mean_var_count_from_moments (line 22) | def update_mean_var_count_from_moments(mean, var, count, batch_mean, bat... class TfRunningMeanStd (line 36) | class TfRunningMeanStd(object): method __init__ (line 42) | def __init__(self, epsilon=1e-4, shape=(), scope=''): method _set_mean_var_count (line 65) | def _set_mean_var_count(self): method update (line 68) | def update(self, x): function test_runningmeanstd (line 85) | def test_runningmeanstd(): function test_tf_runningmeanstd (line 102) | def test_tf_runningmeanstd(): function profile_tf_runningmeanstd (line 120) | def profile_tf_runningmeanstd(): FILE: baselines/common/schedules.py class Schedule (line 12) | class Schedule(object): method value (line 13) | def value(self, t): class ConstantSchedule (line 18) | class ConstantSchedule(object): method __init__ (line 19) | def __init__(self, value): method value (line 29) | def value(self, t): function linear_interpolation (line 34) | def linear_interpolation(l, r, alpha): class PiecewiseSchedule (line 38) | class PiecewiseSchedule(object): method __init__ (line 39) | def __init__(self, endpoints, interpolation=linear_interpolation, outs... method value (line 64) | def value(self, t): class LinearSchedule (line 76) | class LinearSchedule(object): method __init__ (line 77) | def __init__(self, schedule_timesteps, final_p, initial_p=1.0): method value (line 96) | def value(self, t): FILE: baselines/common/segment_tree.py class SegmentTree (line 4) | class SegmentTree(object): method __init__ (line 5) | def __init__(self, capacity, operation, neutral_element): method _reduce_helper (line 36) | def _reduce_helper(self, start, end, node, node_start, node_end): method reduce (line 51) | def reduce(self, start=0, end=None): method __setitem__ (line 76) | def __setitem__(self, idx, val): method __getitem__ (line 88) | def __getitem__(self, idx): class SumSegmentTree (line 93) | class SumSegmentTree(SegmentTree): method __init__ (line 94) | def __init__(self, capacity): method sum (line 101) | def sum(self, start=0, end=None): method find_prefixsum_idx (line 105) | def find_prefixsum_idx(self, prefixsum): class MinSegmentTree (line 134) | class MinSegmentTree(SegmentTree): method __init__ (line 135) | def __init__(self, capacity): method min (line 142) | def min(self, start=0, end=None): FILE: baselines/common/test_mpi_util.py function test_mpi_weighted_mean (line 10) | def test_mpi_weighted_mean(): FILE: baselines/common/tests/envs/fixed_sequence_env.py class FixedSequenceEnv (line 6) | class FixedSequenceEnv(Env): method __init__ (line 7) | def __init__( method reset (line 21) | def reset(self): method step (line 25) | def step(self, actions): method seed (line 34) | def seed(self, seed=None): method _choose_next_state (line 37) | def _choose_next_state(self): method _get_reward (line 40) | def _get_reward(self, actions): FILE: baselines/common/tests/envs/identity_env.py class IdentityEnv (line 7) | class IdentityEnv(Env): method __init__ (line 8) | def __init__( method reset (line 22) | def reset(self): method step (line 30) | def step(self, actions): method seed (line 39) | def seed(self, seed=None): method _get_reward (line 43) | def _get_reward(self, state, actions): class DiscreteIdentityEnv (line 47) | class DiscreteIdentityEnv(IdentityEnv): method __init__ (line 48) | def __init__( method _get_reward (line 59) | def _get_reward(self, state, actions): class MultiDiscreteIdentityEnv (line 62) | class MultiDiscreteIdentityEnv(IdentityEnv): method __init__ (line 63) | def __init__( method _get_reward (line 73) | def _get_reward(self, state, actions): class BoxIdentityEnv (line 77) | class BoxIdentityEnv(IdentityEnv): method __init__ (line 78) | def __init__( method _get_reward (line 87) | def _get_reward(self, state, actions): FILE: baselines/common/tests/envs/identity_env_test.py function test_discrete_nodelay (line 4) | def test_discrete_nodelay(): function test_discrete_delay1 (line 20) | def test_discrete_delay1(): FILE: baselines/common/tests/envs/mnist_env.py class MnistEnv (line 9) | class MnistEnv(Env): method __init__ (line 10) | def __init__( method reset (line 35) | def reset(self): method step (line 41) | def step(self, actions): method seed (line 51) | def seed(self, seed=None): method train_mode (line 54) | def train_mode(self): method test_mode (line 57) | def test_mode(self): method _choose_next_state (line 60) | def _choose_next_state(self): method _get_reward (line 68) | def _get_reward(self, actions): FILE: baselines/common/tests/test_cartpole.py function test_cartpole (line 26) | def test_cartpole(alg): FILE: baselines/common/tests/test_doc_examples.py function test_lstm_example (line 14) | def test_lstm_example(): FILE: baselines/common/tests/test_env_after_learn.py function test_env_after_learn (line 12) | def test_env_after_learn(algo): FILE: baselines/common/tests/test_fetchreach.py function test_fetchreach (line 21) | def test_fetchreach(alg): FILE: baselines/common/tests/test_fixed_sequence.py function test_fixed_sequence (line 29) | def test_fixed_sequence(alg, rnn): FILE: baselines/common/tests/test_identity.py function test_discrete_identity (line 30) | def test_discrete_identity(alg): function test_multidiscrete_identity (line 45) | def test_multidiscrete_identity(alg): function test_continuous_identity (line 60) | def test_continuous_identity(alg): FILE: baselines/common/tests/test_mnist.py function test_mnist (line 33) | def test_mnist(alg): FILE: baselines/common/tests/test_plot_util.py function test_plot_util (line 6) | def test_plot_util(): FILE: baselines/common/tests/test_schedules.py function test_piecewise_schedule (line 6) | def test_piecewise_schedule(): function test_constant_schedule (line 23) | def test_constant_schedule(): FILE: baselines/common/tests/test_segment_tree.py function test_tree_set (line 6) | def test_tree_set(): function test_tree_set_overlap (line 20) | def test_tree_set_overlap(): function test_prefixsum_idx (line 33) | def test_prefixsum_idx(): function test_prefixsum_idx2 (line 47) | def test_prefixsum_idx2(): function test_max_interval_tree (line 63) | def test_max_interval_tree(): FILE: baselines/common/tests/test_serialization.py function test_serialization (line 35) | def test_serialization(learn_fn, network_fn): function test_coexistence (line 87) | def test_coexistence(learn_fn, network_fn): function _serialize_variables (line 121) | def _serialize_variables(): function _get_action_stats (line 128) | def _get_action_stats(model, ob): FILE: baselines/common/tests/test_tf_util.py function test_function (line 10) | def test_function(): function test_multikwargs (line 26) | def test_multikwargs(): FILE: baselines/common/tests/test_with_mpi.py function with_mpi (line 14) | def with_mpi(nproc=2, timeout=30, skip_if_no_mpi=True): FILE: baselines/common/tests/util.py function simple_test (line 14) | def simple_test(env_fn, learn_fn, min_reward_fraction, n_trials=N_TRIALS): function reward_per_episode_test (line 41) | def reward_per_episode_test(env_fn, learn_fn, min_avg_reward, n_trials=N... function rollout (line 53) | def rollout(env, model, n_trials): function smoketest (line 81) | def smoketest(argstr, **kwargs): FILE: baselines/common/tf_util.py function switch (line 9) | def switch(condition, then_expression, else_expression): function lrelu (line 30) | def lrelu(x, leak=0.2): function huber_loss (line 39) | def huber_loss(x, delta=1.0): function get_session (line 51) | def get_session(config=None): function make_session (line 58) | def make_session(config=None, num_cpu=None, make_default=False, graph=No... function single_threaded_session (line 74) | def single_threaded_session(): function in_session (line 78) | def in_session(f): function initialize (line 87) | def initialize(): function normc_initializer (line 97) | def normc_initializer(std=1.0, axis=0): function conv2d (line 104) | def conv2d(x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad=... function function (line 137) | def function(inputs, outputs, updates=None, givens=None): class _Function (line 182) | class _Function(object): method __init__ (line 183) | def __init__(self, inputs, outputs, updates, givens): method _feed_input (line 194) | def _feed_input(self, feed_dict, inpt, value): method __call__ (line 200) | def __call__(self, *args, **kwargs): function var_shape (line 218) | def var_shape(x): function numel (line 224) | def numel(x): function intprod (line 227) | def intprod(x): function flatgrad (line 230) | def flatgrad(loss, var_list, clip_norm=None): class SetFromFlat (line 239) | class SetFromFlat(object): method __init__ (line 240) | def __init__(self, var_list, dtype=tf.float32): method __call__ (line 254) | def __call__(self, theta): class GetFlat (line 257) | class GetFlat(object): method __init__ (line 258) | def __init__(self, var_list): method __call__ (line 261) | def __call__(self): function flattenallbut0 (line 264) | def flattenallbut0(x): function get_placeholder (line 273) | def get_placeholder(name, dtype, shape): function get_placeholder_cached (line 285) | def get_placeholder_cached(name): function display_var_info (line 294) | def display_var_info(vars): function get_available_gpus (line 308) | def get_available_gpus(session_config=None): function load_state (line 325) | def load_state(fname, sess=None): function save_state (line 332) | def save_state(fname, sess=None): function save_variables (line 345) | def save_variables(save_path, variables=None, sess=None): function load_variables (line 357) | def load_variables(load_path, variables=None, sess=None): function adjust_shape (line 377) | def adjust_shape(placeholder, data): function _check_shape (line 404) | def _check_shape(placeholder_shape, data_shape): function _squeeze_shape (line 419) | def _squeeze_shape(shape): function launch_tensorboard_in_background (line 426) | def launch_tensorboard_in_background(log_dir): FILE: baselines/common/tile_images.py function tile_images (line 3) | def tile_images(img_nhwc): FILE: baselines/common/vec_env/dummy_vec_env.py class DummyVecEnv (line 5) | class DummyVecEnv(VecEnv): method __init__ (line 12) | def __init__(self, env_fns): method step_async (line 31) | def step_async(self, actions): method step_wait (line 45) | def step_wait(self): method reset (line 58) | def reset(self): method _save_obs (line 64) | def _save_obs(self, e, obs): method _obs_from_buf (line 71) | def _obs_from_buf(self): method get_images (line 74) | def get_images(self): method render (line 77) | def render(self, mode='human'): FILE: baselines/common/vec_env/shmem_vec_env.py class ShmemVecEnv (line 20) | class ShmemVecEnv(VecEnv): method __init__ (line 25) | def __init__(self, env_fns, spaces=None, context='spawn'): method reset (line 61) | def reset(self): method step_async (line 69) | def step_async(self, actions): method step_wait (line 75) | def step_wait(self): method close_extras (line 81) | def close_extras(self): method get_images (line 92) | def get_images(self, mode='human'): method _decode_obses (line 97) | def _decode_obses(self, obs): function _subproc_worker (line 107) | def _subproc_worker(pipe, parent_pipe, env_fn_wrapper, obs_bufs, obs_sha... FILE: baselines/common/vec_env/subproc_vec_env.py function worker (line 7) | def worker(remote, parent_remote, env_fn_wrappers): class SubprocVecEnv (line 39) | class SubprocVecEnv(VecEnv): method __init__ (line 44) | def __init__(self, env_fns, spaces=None, context='spawn', in_series=1): method step_async (line 75) | def step_async(self, actions): method step_wait (line 82) | def step_wait(self): method reset (line 90) | def reset(self): method close_extras (line 98) | def close_extras(self): method get_images (line 108) | def get_images(self): method _assert_not_closed (line 116) | def _assert_not_closed(self): method __del__ (line 119) | def __del__(self): function _flatten_obs (line 123) | def _flatten_obs(obs): function _flatten_list (line 133) | def _flatten_list(l): FILE: baselines/common/vec_env/test_vec_env.py function assert_venvs_equal (line 14) | def assert_venvs_equal(venv1, venv2, num_steps): function test_vec_env (line 49) | def test_vec_env(klass, dtype): # pylint: disable=R0914 function test_sync_sampling (line 72) | def test_sync_sampling(dtype, num_envs_in_series): function test_sync_sampling_sanity (line 94) | def test_sync_sampling_sanity(dtype, num_envs_in_series): class SimpleEnv (line 114) | class SimpleEnv(gym.Env): method __init__ (line 120) | def __init__(self, seed, shape, dtype): method step (line 133) | def step(self, action): method reset (line 140) | def reset(self): method render (line 145) | def render(self, mode=None): function test_mpi_with_subprocvecenv (line 151) | def test_mpi_with_subprocvecenv(): FILE: baselines/common/vec_env/test_video_recorder.py function test_video_recorder (line 20) | def test_video_recorder(klass, num_envs, video_length, video_interval): FILE: baselines/common/vec_env/util.py function copy_obs_dict (line 11) | def copy_obs_dict(obs): function dict_to_obs (line 18) | def dict_to_obs(obs_dict): function obs_space_info (line 28) | def obs_space_info(obs_space): function obs_to_dict (line 56) | def obs_to_dict(obs): FILE: baselines/common/vec_env/vec_env.py class AlreadySteppingError (line 7) | class AlreadySteppingError(Exception): method __init__ (line 13) | def __init__(self): class NotSteppingError (line 18) | class NotSteppingError(Exception): method __init__ (line 24) | def __init__(self): class VecEnv (line 29) | class VecEnv(ABC): method __init__ (line 43) | def __init__(self, num_envs, observation_space, action_space): method reset (line 49) | def reset(self): method step_async (line 61) | def step_async(self, actions): method step_wait (line 73) | def step_wait(self): method close_extras (line 86) | def close_extras(self): method close (line 93) | def close(self): method step (line 101) | def step(self, actions): method render (line 110) | def render(self, mode='human'): method get_images (line 121) | def get_images(self): method unwrapped (line 128) | def unwrapped(self): method get_viewer (line 134) | def get_viewer(self): class VecEnvWrapper (line 140) | class VecEnvWrapper(VecEnv): method __init__ (line 146) | def __init__(self, venv, observation_space=None, action_space=None): method step_async (line 152) | def step_async(self, actions): method reset (line 156) | def reset(self): method step_wait (line 160) | def step_wait(self): method close (line 163) | def close(self): method render (line 166) | def render(self, mode='human'): method get_images (line 169) | def get_images(self): method __getattr__ (line 172) | def __getattr__(self, name): class VecEnvObservationWrapper (line 177) | class VecEnvObservationWrapper(VecEnvWrapper): method process (line 179) | def process(self, obs): method reset (line 182) | def reset(self): method step_wait (line 186) | def step_wait(self): class CloudpickleWrapper (line 190) | class CloudpickleWrapper(object): method __init__ (line 195) | def __init__(self, x): method __getstate__ (line 198) | def __getstate__(self): method __setstate__ (line 202) | def __setstate__(self, ob): function clear_mpi_env_vars (line 208) | def clear_mpi_env_vars(): FILE: baselines/common/vec_env/vec_frame_stack.py class VecFrameStack (line 6) | class VecFrameStack(VecEnvWrapper): method __init__ (line 7) | def __init__(self, venv, nstack): method step_wait (line 17) | def step_wait(self): method reset (line 26) | def reset(self): FILE: baselines/common/vec_env/vec_monitor.py class VecMonitor (line 7) | class VecMonitor(VecEnvWrapper): method __init__ (line 8) | def __init__(self, venv, filename=None, keep_buf=0, info_keywords=()): method reset (line 25) | def reset(self): method step_wait (line 31) | def step_wait(self): FILE: baselines/common/vec_env/vec_normalize.py class VecNormalize (line 4) | class VecNormalize(VecEnvWrapper): method __init__ (line 10) | def __init__(self, venv, ob=True, ret=True, clipob=10., cliprew=10., g... method step_wait (line 26) | def step_wait(self): method _obfilt (line 36) | def _obfilt(self, obs): method reset (line 44) | def reset(self): FILE: baselines/common/vec_env/vec_remove_dict_obs.py class VecExtractDictObs (line 3) | class VecExtractDictObs(VecEnvObservationWrapper): method __init__ (line 4) | def __init__(self, venv, key): method process (line 9) | def process(self, obs): FILE: baselines/common/vec_env/vec_video_recorder.py class VecVideoRecorder (line 7) | class VecVideoRecorder(VecEnvWrapper): method __init__ (line 12) | def __init__(self, venv, directory, record_video_trigger, video_length... method reset (line 39) | def reset(self): method start_video_recorder (line 46) | def start_video_recorder(self): method _video_enabled (line 60) | def _video_enabled(self): method step_wait (line 63) | def step_wait(self): method close_video_recorder (line 78) | def close_video_recorder(self): method close (line 84) | def close(self): method __del__ (line 88) | def __del__(self): FILE: baselines/common/wrappers.py class TimeLimit (line 3) | class TimeLimit(gym.Wrapper): method __init__ (line 4) | def __init__(self, env, max_episode_steps=None): method step (line 9) | def step(self, ac): method reset (line 17) | def reset(self, **kwargs): class ClipActionsWrapper (line 21) | class ClipActionsWrapper(gym.Wrapper): method step (line 22) | def step(self, action): method reset (line 28) | def reset(self, **kwargs): FILE: baselines/ddpg/ddpg.py function learn (line 21) | def learn(network, env, FILE: baselines/ddpg/ddpg_learner.py function normalize (line 17) | def normalize(x, stats): function denormalize (line 23) | def denormalize(x, stats): function reduce_std (line 28) | def reduce_std(x, axis=None, keepdims=False): function reduce_var (line 31) | def reduce_var(x, axis=None, keepdims=False): function get_target_updates (line 36) | def get_target_updates(vars, target_vars, tau): function get_perturbed_actor_updates (line 50) | def get_perturbed_actor_updates(actor, perturbed_actor, param_noise_stdd... class DDPG (line 66) | class DDPG(object): method __init__ (line 67) | def __init__(self, actor, critic, memory, observation_shape, action_sh... method setup_target_network_updates (line 149) | def setup_target_network_updates(self): method setup_param_noise (line 155) | def setup_param_noise(self, normalized_obs0): method setup_actor_optimizer (line 172) | def setup_actor_optimizer(self): method setup_critic_optimizer (line 183) | def setup_critic_optimizer(self): method setup_popart (line 205) | def setup_popart(self): method setup_stats (line 223) | def setup_stats(self): method step (line 259) | def step(self, obs, apply_noise=True, compute_Q=True): method store_transition (line 280) | def store_transition(self, obs0, action, reward, obs1, terminal1): method train (line 289) | def train(self): method initialize (line 333) | def initialize(self, sess): method update_target_net (line 340) | def update_target_net(self): method get_stats (line 343) | def get_stats(self): method adapt_param_noise (line 362) | def adapt_param_noise(self): method reset (line 389) | def reset(self): FILE: baselines/ddpg/memory.py class RingBuffer (line 4) | class RingBuffer(object): method __init__ (line 5) | def __init__(self, maxlen, shape, dtype='float32'): method __len__ (line 11) | def __len__(self): method __getitem__ (line 14) | def __getitem__(self, idx): method get_batch (line 19) | def get_batch(self, idxs): method append (line 22) | def append(self, v): function array_min2d (line 35) | def array_min2d(x): class Memory (line 42) | class Memory(object): method __init__ (line 43) | def __init__(self, limit, action_shape, observation_shape): method sample (line 52) | def sample(self, batch_size): method append (line 71) | def append(self, obs0, action, reward, obs1, terminal1, training=True): method nb_entries (line 82) | def nb_entries(self): FILE: baselines/ddpg/models.py class Model (line 5) | class Model(object): method __init__ (line 6) | def __init__(self, name, network='mlp', **network_kwargs): method vars (line 11) | def vars(self): method trainable_vars (line 15) | def trainable_vars(self): method perturbable_vars (line 19) | def perturbable_vars(self): class Actor (line 23) | class Actor(Model): method __init__ (line 24) | def __init__(self, nb_actions, name='actor', network='mlp', **network_... method __call__ (line 28) | def __call__(self, obs, reuse=False): class Critic (line 36) | class Critic(Model): method __init__ (line 37) | def __init__(self, name='critic', network='mlp', **network_kwargs): method __call__ (line 41) | def __call__(self, obs, action, reuse=False): method output_vars (line 49) | def output_vars(self): FILE: baselines/ddpg/noise.py class AdaptiveParamNoiseSpec (line 4) | class AdaptiveParamNoiseSpec(object): method __init__ (line 5) | def __init__(self, initial_stddev=0.1, desired_action_stddev=0.1, adop... method adapt (line 12) | def adapt(self, distance): method get_stats (line 20) | def get_stats(self): method __repr__ (line 26) | def __repr__(self): class ActionNoise (line 31) | class ActionNoise(object): method reset (line 32) | def reset(self): class NormalActionNoise (line 36) | class NormalActionNoise(ActionNoise): method __init__ (line 37) | def __init__(self, mu, sigma): method __call__ (line 41) | def __call__(self): method __repr__ (line 44) | def __repr__(self): class OrnsteinUhlenbeckActionNoise (line 49) | class OrnsteinUhlenbeckActionNoise(ActionNoise): method __init__ (line 50) | def __init__(self, mu, sigma, theta=.15, dt=1e-2, x0=None): method __call__ (line 58) | def __call__(self): method reset (line 63) | def reset(self): method __repr__ (line 66) | def __repr__(self): FILE: baselines/ddpg/test_smoke.py function _run (line 2) | def _run(argstr): function test_popart (line 5) | def test_popart(): function test_noise_normal (line 8) | def test_noise_normal(): function test_noise_ou (line 11) | def test_noise_ou(): function test_noise_adaptive (line 14) | def test_noise_adaptive(): FILE: baselines/deepq/__init__.py function wrap_atari_dqn (line 6) | def wrap_atari_dqn(env): FILE: baselines/deepq/build_graph.py function scope_vars (line 100) | def scope_vars(scope, trainable_only=False): function scope_name (line 121) | def scope_name(): function absolute_scope_name (line 126) | def absolute_scope_name(relative_scope_name): function default_param_noise_filter (line 131) | def default_param_noise_filter(var): function build_act (line 146) | def build_act(make_obs_ph, q_func, num_actions, scope="deepq", reuse=None): function build_act_with_param_noise (line 202) | def build_act_with_param_noise(make_obs_ph, q_func, num_actions, scope="... function build_train (line 317) | def build_train(make_obs_ph, q_func, num_actions, optimizer, grad_norm_c... FILE: baselines/deepq/deepq.py class ActWrapper (line 23) | class ActWrapper(object): method __init__ (line 24) | def __init__(self, act, act_params): method load_act (line 30) | def load_act(path): method __call__ (line 46) | def __call__(self, *args, **kwargs): method step (line 49) | def step(self, observation, **kwargs): method save_act (line 55) | def save_act(self, path=None): method save (line 74) | def save(self, path): function load_act (line 78) | def load_act(path): function learn (line 95) | def learn(env, FILE: baselines/deepq/defaults.py function atari (line 1) | def atari(): function retro (line 19) | def retro(): FILE: baselines/deepq/experiments/custom_cartpole.py function model (line 16) | def model(inpt, num_actions, scope, reuse=False): FILE: baselines/deepq/experiments/enjoy_cartpole.py function main (line 6) | def main(): FILE: baselines/deepq/experiments/enjoy_mountaincar.py function main (line 7) | def main(): FILE: baselines/deepq/experiments/enjoy_pong.py function main (line 5) | def main(): FILE: baselines/deepq/experiments/train_cartpole.py function callback (line 6) | def callback(lcl, _glb): function main (line 12) | def main(): FILE: baselines/deepq/experiments/train_mountaincar.py function main (line 7) | def main(): FILE: baselines/deepq/experiments/train_pong.py function main (line 7) | def main(): FILE: baselines/deepq/models.py function build_q_func (line 5) | def build_q_func(network, hiddens=[256], dueling=True, layer_norm=False,... FILE: baselines/deepq/replay_buffer.py class ReplayBuffer (line 7) | class ReplayBuffer(object): method __init__ (line 8) | def __init__(self, size): method __len__ (line 21) | def __len__(self): method add (line 24) | def add(self, obs_t, action, reward, obs_tp1, done): method _encode_sample (line 33) | def _encode_sample(self, idxes): method sample (line 45) | def sample(self, batch_size): class PrioritizedReplayBuffer (line 71) | class PrioritizedReplayBuffer(ReplayBuffer): method __init__ (line 72) | def __init__(self, size, alpha): method add (line 100) | def add(self, *args, **kwargs): method _sample_proportional (line 107) | def _sample_proportional(self, batch_size): method sample (line 117) | def sample(self, batch_size, beta): method update_priorities (line 169) | def update_priorities(self, idxes, priorities): FILE: baselines/deepq/utils.py class TfInput (line 9) | class TfInput(object): method __init__ (line 10) | def __init__(self, name="(unnamed)"): method get (line 17) | def get(self): method make_feed_dict (line 23) | def make_feed_dict(self, data): class PlaceholderTfInput (line 28) | class PlaceholderTfInput(TfInput): method __init__ (line 29) | def __init__(self, placeholder): method get (line 34) | def get(self): method make_feed_dict (line 37) | def make_feed_dict(self, data): class ObservationInput (line 41) | class ObservationInput(PlaceholderTfInput): method __init__ (line 42) | def __init__(self, observation_space, name=None): method get (line 56) | def get(self): FILE: baselines/gail/adversary.py function logsigmoid (line 11) | def logsigmoid(a): function logit_bernoulli_entropy (line 16) | def logit_bernoulli_entropy(logits): class TransitionClassifier (line 20) | class TransitionClassifier(object): method __init__ (line 21) | def __init__(self, env, hidden_size, entcoeff=0.001, lr_rate=1e-3, sco... method build_ph (line 56) | def build_ph(self): method build_graph (line 62) | def build_graph(self, obs_ph, acs_ph, reuse=False): method get_trainable_variables (line 76) | def get_trainable_variables(self): method get_reward (line 79) | def get_reward(self, obs, acs): FILE: baselines/gail/behavior_clone.py function argsparser (line 24) | def argsparser(): function learn (line 42) | def learn(env, policy_func, dataset, optim_batch_size=128, max_iters=1e4, function get_task_name (line 80) | def get_task_name(args): function main (line 88) | def main(args): FILE: baselines/gail/dataset/mujoco_dset.py class Dset (line 12) | class Dset(object): method __init__ (line 13) | def __init__(self, inputs, labels, randomize): method init_pointer (line 21) | def init_pointer(self): method get_next_batch (line 29) | def get_next_batch(self, batch_size): class Mujoco_Dset (line 42) | class Mujoco_Dset(object): method __init__ (line 43) | def __init__(self, expert_path, train_fraction=0.7, traj_limitation=-1... method log_info (line 79) | def log_info(self): method get_next_batch (line 85) | def get_next_batch(self, batch_size, split=None): method plot (line 95) | def plot(self): function test (line 102) | def test(expert_path, traj_limitation, plot): FILE: baselines/gail/gail-eval.py function load_dataset (line 28) | def load_dataset(expert_path): function argsparser (line 33) | def argsparser(): function evaluate_env (line 43) | def evaluate_env(env_name, seed, policy_hidden_size, stochastic, reuse, ... function plot (line 92) | def plot(env_name, bc_log, gail_log, stochastic): function main (line 130) | def main(args): FILE: baselines/gail/mlp_policy.py class MlpPolicy (line 15) | class MlpPolicy(object): method __init__ (line 18) | def __init__(self, name, reuse=False, *args, **kwargs): method _init (line 25) | def _init(self, ob_space, ac_space, hid_size, num_hid_layers, gaussian... method act (line 64) | def act(self, stochastic, ob): method get_variables (line 68) | def get_variables(self): method get_trainable_variables (line 71) | def get_trainable_variables(self): method get_initial_state (line 74) | def get_initial_state(self): FILE: baselines/gail/run_mujoco.py function argsparser (line 23) | def argsparser(): function get_task_name (line 58) | def get_task_name(args): function main (line 71) | def main(args): function train (line 121) | def train(env, seed, policy_fn, reward_giver, dataset, algo, function runner (line 157) | def runner(env, policy_func, load_model_path, timesteps_per_batch, numbe... function traj_1_generator (line 197) | def traj_1_generator(pi, env, horizon, stochastic): FILE: baselines/gail/statistics.py class stats (line 11) | class stats(): method __init__ (line 13) | def __init__(self, scalar_keys=[], histogram_keys=[]): method add_all_summary (line 34) | def add_all_summary(self, writer, values, iter): FILE: baselines/gail/trpo_mpi.py function traj_segment_generator (line 23) | def traj_segment_generator(pi, env, reward_giver, horizon, stochastic): function add_vtarg_and_adv (line 91) | def add_vtarg_and_adv(seg, gamma, lam): function learn (line 105) | def learn(env, policy_func, reward_giver, expert_dataset, rank, function flatten_lists (line 353) | def flatten_lists(listoflists): FILE: baselines/her/actor_critic.py class ActorCritic (line 5) | class ActorCritic: method __init__ (line 7) | def __init__(self, inputs_tf, dimo, dimg, dimu, max_u, o_stats, g_stat... FILE: baselines/her/ddpg.py function dims_to_shapes (line 16) | def dims_to_shapes(input_dims): class DDPG (line 22) | class DDPG(object): method __init__ (line 24) | def __init__(self, input_dims, buffer_size, hidden, layers, network_cl... method _random_action (line 109) | def _random_action(self, n): method _preprocess_og (line 112) | def _preprocess_og(self, o, ag, g): method step (line 123) | def step(self, obs): method get_actions (line 128) | def get_actions(self, o, ag, g, noise_eps=0., random_eps=0., use_targe... method init_demo_buffer (line 160) | def init_demo_buffer(self, demoDataFile, update_stats=True): #function... method store_episode (line 217) | def store_episode(self, episode_batch, update_stats=True): method get_current_buffer_size (line 242) | def get_current_buffer_size(self): method _sync_optimizers (line 245) | def _sync_optimizers(self): method _grads (line 249) | def _grads(self): method _update (line 259) | def _update(self, Q_grad, pi_grad): method sample_batch (line 263) | def sample_batch(self): method stage_batch (line 284) | def stage_batch(self, batch=None): method train (line 290) | def train(self, stage=True): method _init_target_net (line 297) | def _init_target_net(self): method update_target_net (line 300) | def update_target_net(self): method clear_buffer (line 303) | def clear_buffer(self): method _vars (line 306) | def _vars(self, scope): method _global_vars (line 311) | def _global_vars(self, scope): method _create_network (line 315) | def _create_network(self, reuse=False): method logs (line 406) | def logs(self, prefix=''): method __getstate__ (line 418) | def __getstate__(self): method __setstate__ (line 430) | def __setstate__(self, state): method save (line 446) | def save(self, save_path): FILE: baselines/her/experiment/config.py function cached_make_env (line 61) | def cached_make_env(make_env): function prepare_params (line 73) | def prepare_params(kwargs): function log_params (line 122) | def log_params(params, logger=logger): function configure_her (line 127) | def configure_her(params): function simple_goal_subtract (line 147) | def simple_goal_subtract(a, b): function configure_ddpg (line 152) | def configure_ddpg(dims, params, reuse=False, use_mpi=True, clip_return=... function configure_dims (line 186) | def configure_dims(params): FILE: baselines/her/experiment/data_generation/fetch_data_generation.py function main (line 11) | def main(): function goToGoal (line 30) | def goToGoal(env, lastObs): FILE: baselines/her/experiment/play.py function main (line 17) | def main(policy_file, seed, n_test_rollouts, render): FILE: baselines/her/experiment/plot.py function smooth_reward_curve (line 12) | def smooth_reward_curve(x, y): function load_results (line 21) | def load_results(file): function pad (line 40) | def pad(xs, value=np.nan): FILE: baselines/her/her.py function mpi_average (line 14) | def mpi_average(value): function train (line 22) | def train(*, policy, rollout_worker, evaluator, function learn (line 87) | def learn(*, network, env, total_timesteps, function main (line 188) | def main(**kwargs): FILE: baselines/her/her_sampler.py function make_sample_her_transitions (line 4) | def make_sample_her_transitions(replay_strategy, replay_k, reward_fun): FILE: baselines/her/normalizer.py class Normalizer (line 10) | class Normalizer: method __init__ (line 11) | def __init__(self, size, eps=1e-2, default_clip_range=np.inf, sess=None): method update (line 64) | def update(self, v): method normalize (line 72) | def normalize(self, v, clip_range=None): method denormalize (line 79) | def denormalize(self, v): method _mpi_average (line 84) | def _mpi_average(self, x): method synchronize (line 90) | def synchronize(self, local_sum, local_sumsq, local_count, root=None): method recompute_stats (line 96) | def recompute_stats(self): class IdentityNormalizer (line 121) | class IdentityNormalizer: method __init__ (line 122) | def __init__(self, size, std=1.): method update (line 127) | def update(self, x): method normalize (line 130) | def normalize(self, x, clip_range=None): method denormalize (line 133) | def denormalize(self, x): method synchronize (line 136) | def synchronize(self): method recompute_stats (line 139) | def recompute_stats(self): FILE: baselines/her/replay_buffer.py class ReplayBuffer (line 6) | class ReplayBuffer: method __init__ (line 7) | def __init__(self, buffer_shapes, size_in_transitions, T, sample_trans... method full (line 33) | def full(self): method sample (line 37) | def sample(self, batch_size): method store_episode (line 57) | def store_episode(self, episode_batch): method get_current_episode_size (line 73) | def get_current_episode_size(self): method get_current_size (line 77) | def get_current_size(self): method get_transitions_stored (line 81) | def get_transitions_stored(self): method clear_buffer (line 85) | def clear_buffer(self): method _get_storage_idx (line 89) | def _get_storage_idx(self, inc=None): FILE: baselines/her/rollout.py class RolloutWorker (line 9) | class RolloutWorker: method __init__ (line 12) | def __init__(self, venv, policy, dims, logger, T, rollout_batch_size=1, method reset_all_rollouts (line 44) | def reset_all_rollouts(self): method generate_rollouts (line 50) | def generate_rollouts(self): method clear_history (line 138) | def clear_history(self): method current_success_rate (line 144) | def current_success_rate(self): method current_mean_Q (line 147) | def current_mean_Q(self): method save_policy (line 150) | def save_policy(self, path): method logs (line 156) | def logs(self, prefix='worker'): FILE: baselines/her/util.py function store_args (line 14) | def store_args(method): function import_function (line 41) | def import_function(spec): function flatten_grads (line 50) | def flatten_grads(var_list, grads): function nn (line 57) | def nn(input, layers_sizes, reuse=None, flatten=False, name=""): function install_mpi_excepthook (line 75) | def install_mpi_excepthook(): function mpi_fork (line 88) | def mpi_fork(n, extra_mpi_args=[]): function convert_episode_to_batch_major (line 114) | def convert_episode_to_batch_major(episode): function transitions_in_episode_batch (line 127) | def transitions_in_episode_batch(episode_batch): function reshape_for_broadcasting (line 134) | def reshape_for_broadcasting(source, target): FILE: baselines/logger.py class KVWriter (line 19) | class KVWriter(object): method writekvs (line 20) | def writekvs(self, kvs): class SeqWriter (line 23) | class SeqWriter(object): method writeseq (line 24) | def writeseq(self, seq): class HumanOutputFormat (line 27) | class HumanOutputFormat(KVWriter, SeqWriter): method __init__ (line 28) | def __init__(self, filename_or_file): method writekvs (line 37) | def writekvs(self, kvs): method _truncate (line 71) | def _truncate(self, s): method writeseq (line 75) | def writeseq(self, seq): method close (line 84) | def close(self): class JSONOutputFormat (line 88) | class JSONOutputFormat(KVWriter): method __init__ (line 89) | def __init__(self, filename): method writekvs (line 92) | def writekvs(self, kvs): method close (line 99) | def close(self): class CSVOutputFormat (line 102) | class CSVOutputFormat(KVWriter): method __init__ (line 103) | def __init__(self, filename): method writekvs (line 108) | def writekvs(self, kvs): method close (line 135) | def close(self): class TensorBoardOutputFormat (line 139) | class TensorBoardOutputFormat(KVWriter): method __init__ (line 143) | def __init__(self, dir): method writekvs (line 158) | def writekvs(self, kvs): method close (line 169) | def close(self): function make_output_format (line 174) | def make_output_format(format, ev_dir, log_suffix=''): function logkv (line 193) | def logkv(key, val): function logkv_mean (line 201) | def logkv_mean(key, val): function logkvs (line 207) | def logkvs(d): function dumpkvs (line 214) | def dumpkvs(): function getkvs (line 220) | def getkvs(): function log (line 224) | def log(*args, level=INFO): function debug (line 230) | def debug(*args): function info (line 233) | def info(*args): function warn (line 236) | def warn(*args): function error (line 239) | def error(*args): function set_level (line 243) | def set_level(level): function set_comm (line 249) | def set_comm(comm): function get_dir (line 252) | def get_dir(): function profile_kv (line 263) | def profile_kv(scopename): function profile (line 271) | def profile(n): function get_current (line 289) | def get_current(): class Logger (line 296) | class Logger(object): method __init__ (line 301) | def __init__(self, dir, output_formats, comm=None): method logkv (line 311) | def logkv(self, key, val): method logkv_mean (line 314) | def logkv_mean(self, key, val): method dumpkvs (line 319) | def dumpkvs(self): method log (line 337) | def log(self, *args, level=INFO): method set_level (line 343) | def set_level(self, level): method set_comm (line 346) | def set_comm(self, comm): method get_dir (line 349) | def get_dir(self): method close (line 352) | def close(self): method _do_log (line 358) | def _do_log(self, args): function get_rank_without_mpi_import (line 363) | def get_rank_without_mpi_import(): function configure (line 372) | def configure(dir=None, format_strs=None, comm=None, log_suffix=''): function _configure_default_logger (line 401) | def _configure_default_logger(): function reset (line 405) | def reset(): function scoped_configure (line 412) | def scoped_configure(dir=None, format_strs=None, comm=None): function _demo (line 423) | def _demo(): function read_json (line 456) | def read_json(fname): function read_csv (line 464) | def read_csv(fname): function read_tb (line 468) | def read_tb(path): FILE: baselines/ppo1/cnn_policy.py class CnnPolicy (line 6) | class CnnPolicy(object): method __init__ (line 8) | def __init__(self, name, ob_space, ac_space, kind='large'): method _init (line 13) | def _init(self, ob_space, ac_space, kind): method act (line 47) | def act(self, stochastic, ob): method get_variables (line 50) | def get_variables(self): method get_trainable_variables (line 52) | def get_trainable_variables(self): method get_initial_state (line 54) | def get_initial_state(self): FILE: baselines/ppo1/mlp_policy.py class MlpPolicy (line 7) | class MlpPolicy(object): method __init__ (line 9) | def __init__(self, name, *args, **kwargs): method _init (line 14) | def _init(self, ob_space, ac_space, hid_size, num_hid_layers, gaussian... method act (line 52) | def act(self, stochastic, ob): method get_variables (line 55) | def get_variables(self): method get_trainable_variables (line 57) | def get_trainable_variables(self): method get_initial_state (line 59) | def get_initial_state(self): FILE: baselines/ppo1/pposgd_simple.py function traj_segment_generator (line 11) | def traj_segment_generator(pi, env, horizon, stochastic): function add_vtarg_and_adv (line 64) | def add_vtarg_and_adv(seg, gamma, lam): function learn (line 80) | def learn(env, policy_fn, *, function flatten_lists (line 216) | def flatten_lists(listoflists): FILE: baselines/ppo1/run_atari.py function train (line 11) | def train(env_id, num_timesteps, seed): function main (line 43) | def main(): FILE: baselines/ppo1/run_humanoid.py function train (line 9) | def train(num_timesteps, seed, model_path=None): class RewScale (line 40) | class RewScale(gym.RewardWrapper): method __init__ (line 41) | def __init__(self, env, scale): method reward (line 44) | def reward(self, r): function main (line 47) | def main(): FILE: baselines/ppo1/run_mujoco.py function train (line 7) | def train(env_id, num_timesteps, seed): function main (line 23) | def main(): FILE: baselines/ppo1/run_robotics.py function train (line 10) | def train(env_id, num_timesteps, seed): function main (line 34) | def main(): FILE: baselines/ppo2/defaults.py function mujoco (line 1) | def mujoco(): function atari (line 15) | def atari(): function retro (line 24) | def retro(): FILE: baselines/ppo2/microbatched_model.py class MicrobatchedModel (line 5) | class MicrobatchedModel(Model): method __init__ (line 10) | def __init__(self, *, policy, ob_space, ac_space, nbatch_act, nbatch_t... method train (line 35) | def train(self, lr, cliprange, obs, returns, masks, actions, values, n... FILE: baselines/ppo2/model.py class Model (line 14) | class Model(object): method __init__ (line 27) | def __init__(self, *, policy, ob_space, ac_space, nbatch_act, nbatch_t... method train (line 133) | def train(self, lr, cliprange, obs, returns, masks, actions, values, n... FILE: baselines/ppo2/ppo2.py function constfn (line 16) | def constfn(val): function learn (line 21) | def learn(*, network, env, total_timesteps, eval_env = None, seed=None, ... function safemean (line 220) | def safemean(xs): FILE: baselines/ppo2/runner.py class Runner (line 4) | class Runner(AbstractEnvRunner): method __init__ (line 13) | def __init__(self, *, env, model, nsteps, gamma, lam): method run (line 20) | def run(self): function sf01 (line 69) | def sf01(arr): FILE: baselines/ppo2/test_microbatches.py function test_microbatches (line 12) | def test_microbatches(): FILE: baselines/results_plotter.py function rolling_window (line 21) | def rolling_window(a, window): function window_func (line 26) | def window_func(x, y, window, func): function ts2xy (line 31) | def ts2xy(ts, xaxis, yaxis): function plot_curves (line 48) | def plot_curves(xy_list, xaxis, yaxis, title): function split_by_task (line 66) | def split_by_task(taskpath): function plot_results (line 69) | def plot_results(dirs, num_timesteps=10e6, xaxis=X_TIMESTEPS, yaxis=Y_RE... function main (line 79) | def main(): FILE: baselines/run.py function train (line 53) | def train(args, extra_args): function build_env (line 86) | def build_env(args): function get_env_type (line 121) | def get_env_type(args): function get_default_network (line 148) | def get_default_network(env_type): function get_alg_module (line 154) | def get_alg_module(alg, submodule=None): function get_learn_function (line 166) | def get_learn_function(alg): function get_learn_function_defaults (line 170) | def get_learn_function_defaults(alg, env_type): function parse_cmdline_kwargs (line 180) | def parse_cmdline_kwargs(args): function configure_logger (line 195) | def configure_logger(log_path, **kwargs): function main (line 202) | def main(args): FILE: baselines/trpo_mpi/defaults.py function atari (line 4) | def atari(): function mujoco (line 18) | def mujoco(): FILE: baselines/trpo_mpi/trpo_mpi.py function traj_segment_generator (line 20) | def traj_segment_generator(pi, env, horizon, stochastic): function add_vtarg_and_adv (line 76) | def add_vtarg_and_adv(seg, gamma, lam): function learn (line 89) | def learn(*, function flatten_lists (line 394) | def flatten_lists(listoflists): function get_variables (line 397) | def get_variables(scope): function get_trainable_variables (line 400) | def get_trainable_variables(scope): function get_vf_trainable_variables (line 403) | def get_vf_trainable_variables(scope): function get_pi_trainable_variables (line 406) | def get_pi_trainable_variables(scope):