SYMBOL INDEX (182 symbols across 13 files) FILE: code/actor_critic_advantage.py class Actor (line 12) | class Actor(object): #本质还是policy gradient 不过A2C是单步更新 method __init__ (line 13) | def __init__(self, method choose_action (line 55) | def choose_action(self, s): #选择行为 method learn (line 62) | def learn(self, s, a, td): class Critic (line 69) | class Critic(object): method __init__ (line 70) | def __init__(self, sess, n_features, lr=0.01, gamma=0.9): method learn (line 104) | def learn(self, s, r, s_): FILE: code/ddpg_update.py class DDPG (line 26) | class DDPG(object): method __init__ (line 27) | def __init__(self, a_dim, s_dim, a_bound,): #初始化2个网络图 注意无论是critic还是act... method choose_action (line 65) | def choose_action(self, s): method learn (line 70) | def learn(self): method store_transition (line 85) | def store_transition(self, s, a, r, s_): #离线训练算法标准操作 method _build_a (line 91) | def _build_a(self, s, scope, trainable): #actor网络结构 直接输出动作确定a method _build_c (line 97) | def _build_c(self, s, a, scope, trainable): #critic网络结构 输出Q(s,a) FILE: code/deep_deterministic_policy_gradient.py class Actor (line 26) | class Actor(object): method __init__ (line 27) | def __init__(self, sess, action_dim, action_bound, learning_rate, repl... method _build_net (line 55) | def _build_net(self, s, scope, trainable): method learn (line 68) | def learn(self, s): # batch update method choose_action (line 78) | def choose_action(self, s): method add_grad_to_graph (line 84) | def add_grad_to_graph(self, a_grads): class Critic (line 96) | class Critic(object): method __init__ (line 97) | def __init__(self, sess, state_dim, action_dim, learning_rate, gamma, ... method _build_net (line 135) | def _build_net(self, s, a, scope, trainable): method learn (line 151) | def learn(self, s, a, r, s_): class Memory (line 161) | class Memory(object): method __init__ (line 162) | def __init__(self, capacity, dims): method store_transition (line 167) | def store_transition(self, s, a, r, s_): method sample (line 173) | def sample(self, n): FILE: code/policy_gradient.py class PolicyGradient (line 5) | class PolicyGradient: method __init__ (line 6) | def __init__(self, method __build_net (line 29) | def __build_net(self): #PG网络 method choose_action (line 60) | def choose_action(self, observation): #选择行为 method store_transition (line 65) | def store_transition(self, s, a, r):#存储一个回合的经验 method learn (line 70) | def learn(self): method _discount_and_norm_rewards (line 81) | def _discount_and_norm_rewards(self): #用bellman公式计算出vt(s,a) FILE: code/proximal_policy_optimization.py class PPO (line 31) | class PPO(object): method __init__ (line 32) | def __init__(self): method update (line 72) | def update(self, s, a, r): #update ppo method choose_action (line 96) | def choose_action(self, s): method get_v (line 101) | def get_v(self, s): #V(s)状态值 由critic网络给出 method _build_anet (line 106) | def _build_anet(self, name, trainable): #critic网络输出动作的概率分布 包含参数均值u与方差s... FILE: code/tensrolayer-implemented/a3c.py class ACNet (line 83) | class ACNet(object): method __init__ (line 85) | def __init__(self, scope, globalAC=None): method update_global (line 115) | def update_global( method pull_global (line 145) | def pull_global(self, globalAC): # run by a local, pull weights from ... method choose_action (line 151) | def choose_action(self, s): # run by a local method save_ckpt (line 161) | def save_ckpt(self): # save trained weights method load_ckpt (line 165) | def load_ckpt(self): # load trained weights class Worker (line 170) | class Worker(object): method __init__ (line 172) | def __init__(self, name, globalAC): method work (line 178) | def work(self, globalAC): FILE: code/tensrolayer-implemented/ac.py class Actor (line 71) | class Actor(object): method __init__ (line 73) | def __init__(self, n_features, n_actions, lr=0.001): method learn (line 90) | def learn(self, s, a, td): method choose_action (line 101) | def choose_action(self, s): method choose_action_greedy (line 106) | def choose_action_greedy(self, s): method save_ckpt (line 111) | def save_ckpt(self): # save trained weights method load_ckpt (line 114) | def load_ckpt(self): # load trained weights class Critic (line 118) | class Critic(object): method __init__ (line 120) | def __init__(self, n_features, lr=0.01): method learn (line 138) | def learn(self, s, r, s_): method save_ckpt (line 150) | def save_ckpt(self): # save trained weights method load_ckpt (line 153) | def load_ckpt(self): # load trained weights FILE: code/tensrolayer-implemented/ddpg.py class DDPG (line 61) | class DDPG(object): method __init__ (line 66) | def __init__(self, a_dim, s_dim, a_bound): method ema_update (line 132) | def ema_update(self): method choose_action (line 142) | def choose_action(self, s): method learn (line 150) | def learn(self): method store_transition (line 180) | def store_transition(self, s, a, r, s_): method save_ckpt (line 196) | def save_ckpt(self): method load_ckpt (line 209) | def load_ckpt(self): FILE: code/tensrolayer-implemented/dqn.py function to_one_hot (line 64) | def to_one_hot(i, n_classes=None): function get_model (line 72) | def get_model(inputs_shape): function save_ckpt (line 78) | def save_ckpt(model): # save trained weights function load_ckpt (line 82) | def load_ckpt(model): # load trained weights FILE: code/tensrolayer-implemented/dqn_variants.py class MLP (line 98) | class MLP(tl.models.Model): method __init__ (line 100) | def __init__(self, name): method forward (line 107) | def forward(self, ni): class CNN (line 134) | class CNN(tl.models.Model): method __init__ (line 136) | def __init__(self, name): method forward (line 163) | def forward(self, ni): class ReplayBuffer (line 189) | class ReplayBuffer(object): method __init__ (line 191) | def __init__(self, size): method __len__ (line 196) | def __len__(self): method add (line 199) | def add(self, *args): method _encode_sample (line 206) | def _encode_sample(self, idxes): method sample (line 223) | def sample(self, batch_size): function huber_loss (line 229) | def huber_loss(x): function sync (line 234) | def sync(net, net_tar): function log_softmax (line 240) | def log_softmax(x, dim): function softmax (line 245) | def softmax(x, dim): FILE: code/tensrolayer-implemented/pg.py class PolicyGradient (line 53) | class PolicyGradient: method __init__ (line 58) | def __init__(self, n_features, n_actions, learning_rate=0.01, reward_d... method choose_action (line 96) | def choose_action(self, s): method choose_action_greedy (line 106) | def choose_action_greedy(self, s): method store_transition (line 115) | def store_transition(self, s, a, r): method learn (line 127) | def learn(self): method _discount_and_norm_rewards (line 153) | def _discount_and_norm_rewards(self): method save_ckpt (line 170) | def save_ckpt(self): method load_ckpt (line 179) | def load_ckpt(self): FILE: code/tensrolayer-implemented/ppo.py class PPO (line 65) | class PPO(object): method __init__ (line 70) | def __init__(self): method a_train (line 85) | def a_train(self, tfs, tfa, tfadv): method update_old_pi (line 123) | def update_old_pi(self): method c_train (line 131) | def c_train(self, tfdc_r, s): #训练critic网络,mse优化 method cal_adv (line 147) | def cal_adv(self, tfs, tfdc_r): method update (line 158) | def update(self, s, a, r): method _build_anet (line 193) | def _build_anet(self, name, trainable): method choose_action (line 213) | def choose_action(self, s): method get_v (line 225) | def get_v(self, s): method save_ckpt (line 235) | def save_ckpt(self): method load_ckpt (line 246) | def load_ckpt(self): FILE: code/tensrolayer-implemented/tutorial_wrappers.py function build_env (line 39) | def build_env(env_id, vectorized=False, seed=0, reward_scale=1.0, nenv=0): function _make_env (line 52) | def _make_env(env_id, env_type, seed, reward_scale, frame_stack=True): function _make_vec_env (line 78) | def _make_vec_env(env_id, env_type, nenv, seed, reward_scale, frame_stac... class TimeLimit (line 86) | class TimeLimit(gym.Wrapper): method __init__ (line 88) | def __init__(self, env, max_episode_steps=None): method step (line 93) | def step(self, ac): method reset (line 101) | def reset(self, **kwargs): class NoopResetEnv (line 106) | class NoopResetEnv(gym.Wrapper): method __init__ (line 108) | def __init__(self, env, noop_max=30): method reset (line 118) | def reset(self, **kwargs): method step (line 133) | def step(self, ac): class FireResetEnv (line 137) | class FireResetEnv(gym.Wrapper): method __init__ (line 139) | def __init__(self, env): method reset (line 145) | def reset(self, **kwargs): method step (line 155) | def step(self, ac): class EpisodicLifeEnv (line 159) | class EpisodicLifeEnv(gym.Wrapper): method __init__ (line 161) | def __init__(self, env): method step (line 169) | def step(self, action): method reset (line 183) | def reset(self, **kwargs): class MaxAndSkipEnv (line 197) | class MaxAndSkipEnv(gym.Wrapper): method __init__ (line 199) | def __init__(self, env, skip=4): method step (line 207) | def step(self, action): method reset (line 225) | def reset(self, **kwargs): class ClipRewardEnv (line 229) | class ClipRewardEnv(gym.RewardWrapper): method __init__ (line 231) | def __init__(self, env): method reward (line 234) | def reward(self, reward): class WarpFrame (line 239) | class WarpFrame(gym.ObservationWrapper): method __init__ (line 241) | def __init__(self, env, width=84, height=84, grayscale=True): method observation (line 250) | def observation(self, frame): class FrameStack (line 260) | class FrameStack(gym.Wrapper): method __init__ (line 262) | def __init__(self, env, k): method reset (line 274) | def reset(self): method step (line 280) | def step(self, action): method _get_ob (line 285) | def _get_ob(self): class LazyFrames (line 290) | class LazyFrames(object): method __init__ (line 292) | def __init__(self, frames): method _force (line 302) | def _force(self): method __array__ (line 308) | def __array__(self, dtype=None): method __len__ (line 314) | def __len__(self): method __getitem__ (line 317) | def __getitem__(self, i): class RewardScaler (line 321) | class RewardScaler(gym.RewardWrapper): method __init__ (line 326) | def __init__(self, env, scale=0.01): method reward (line 330) | def reward(self, reward): class VecFrameStack (line 334) | class VecFrameStack(object): method __init__ (line 336) | def __init__(self, env, k): method reset (line 345) | def reset(self): method step (line 351) | def step(self, action): method _get_ob (line 356) | def _get_ob(self): function _worker (line 361) | def _worker(remote, parent_remote, env_fn_wrapper): class CloudpickleWrapper (line 386) | class CloudpickleWrapper(object): method __init__ (line 391) | def __init__(self, x): method __getstate__ (line 394) | def __getstate__(self): method __setstate__ (line 398) | def __setstate__(self, ob): class SubprocVecEnv (line 403) | class SubprocVecEnv(object): method __init__ (line 405) | def __init__(self, env_fns): method _step_async (line 434) | def _step_async(self, actions): method _step_wait (line 446) | def _step_wait(self): method reset (line 461) | def reset(self): method _reset_task (line 473) | def _reset_task(self): method close (line 478) | def close(self): method __len__ (line 490) | def __len__(self): method step (line 493) | def step(self, actions): class Monitor (line 498) | class Monitor(gym.Wrapper): method __init__ (line 500) | def __init__(self, env): method reset (line 504) | def reset(self, **kwargs): method step (line 508) | def step(self, action): class NormalizedActions (line 516) | class NormalizedActions(gym.ActionWrapper): method _action (line 518) | def _action(self, action): method _reverse_action (line 527) | def _reverse_action(self, action): function unit_test (line 537) | def unit_test():