SYMBOL INDEX (182 symbols across 13 files)

FILE: code/actor_critic_advantage.py
  class Actor (line 12) | class Actor(object): #本质还是policy gradient 不过A2C是单步更新
    method __init__ (line 13) | def __init__(self,
    method choose_action (line 55) | def choose_action(self, s): #选择行为
    method learn (line 62) | def learn(self, s, a, td):
  class Critic (line 69) | class Critic(object):
    method __init__ (line 70) | def __init__(self, sess, n_features, lr=0.01, gamma=0.9):
    method learn (line 104) | def learn(self, s, r, s_):

FILE: code/ddpg_update.py
  class DDPG (line 26) | class DDPG(object):
    method __init__ (line 27) | def __init__(self, a_dim, s_dim, a_bound,): #初始化2个网络图 注意无论是critic还是act...
    method choose_action (line 65) | def choose_action(self, s):
    method learn (line 70) | def learn(self):
    method store_transition (line 85) | def store_transition(self, s, a, r, s_): #离线训练算法标准操作
    method _build_a (line 91) | def _build_a(self, s, scope, trainable): #actor网络结构 直接输出动作确定a
    method _build_c (line 97) | def _build_c(self, s, a, scope, trainable): #critic网络结构 输出Q(s,a)

FILE: code/deep_deterministic_policy_gradient.py
  class Actor (line 26) | class Actor(object):
    method __init__ (line 27) | def __init__(self, sess, action_dim, action_bound, learning_rate, repl...
    method _build_net (line 55) | def _build_net(self, s, scope, trainable):
    method learn (line 68) | def learn(self, s):   # batch update
    method choose_action (line 78) | def choose_action(self, s):
    method add_grad_to_graph (line 84) | def add_grad_to_graph(self, a_grads):
  class Critic (line 96) | class Critic(object):
    method __init__ (line 97) | def __init__(self, sess, state_dim, action_dim, learning_rate, gamma, ...
    method _build_net (line 135) | def _build_net(self, s, a, scope, trainable):
    method learn (line 151) | def learn(self, s, a, r, s_):
  class Memory (line 161) | class Memory(object):
    method __init__ (line 162) | def __init__(self, capacity, dims):
    method store_transition (line 167) | def store_transition(self, s, a, r, s_):
    method sample (line 173) | def sample(self, n):

FILE: code/policy_gradient.py
  class PolicyGradient (line 5) | class PolicyGradient:
    method __init__ (line 6) | def __init__(self,
    method __build_net (line 29) | def __build_net(self): #PG网络
    method choose_action (line 60) | def choose_action(self, observation): #选择行为
    method store_transition (line 65) | def store_transition(self, s, a, r):#存储一个回合的经验
    method learn (line 70) | def learn(self):
    method _discount_and_norm_rewards (line 81) | def _discount_and_norm_rewards(self): #用bellman公式计算出vt(s,a)

FILE: code/proximal_policy_optimization.py
  class PPO (line 31) | class PPO(object):
    method __init__ (line 32) | def __init__(self):
    method update (line 72) | def update(self, s, a, r): #update ppo
    method choose_action (line 96) | def choose_action(self, s):
    method get_v (line 101) | def get_v(self, s): #V(s)状态值 由critic网络给出
    method _build_anet (line 106) | def _build_anet(self, name, trainable): #critic网络输出动作的概率分布 包含参数均值u与方差s...

FILE: code/tensrolayer-implemented/a3c.py
  class ACNet (line 83) | class ACNet(object):
    method __init__ (line 85) | def __init__(self, scope, globalAC=None):
    method update_global (line 115) | def update_global(
    method pull_global (line 145) | def pull_global(self, globalAC):  # run by a local, pull weights from ...
    method choose_action (line 151) | def choose_action(self, s):  # run by a local
    method save_ckpt (line 161) | def save_ckpt(self):  # save trained weights
    method load_ckpt (line 165) | def load_ckpt(self):  # load trained weights
  class Worker (line 170) | class Worker(object):
    method __init__ (line 172) | def __init__(self, name, globalAC):
    method work (line 178) | def work(self, globalAC):

FILE: code/tensrolayer-implemented/ac.py
  class Actor (line 71) | class Actor(object):
    method __init__ (line 73) | def __init__(self, n_features, n_actions, lr=0.001):
    method learn (line 90) | def learn(self, s, a, td):
    method choose_action (line 101) | def choose_action(self, s):
    method choose_action_greedy (line 106) | def choose_action_greedy(self, s):
    method save_ckpt (line 111) | def save_ckpt(self):  # save trained weights
    method load_ckpt (line 114) | def load_ckpt(self):  # load trained weights
  class Critic (line 118) | class Critic(object):
    method __init__ (line 120) | def __init__(self, n_features, lr=0.01):
    method learn (line 138) | def learn(self, s, r, s_):
    method save_ckpt (line 150) | def save_ckpt(self):  # save trained weights
    method load_ckpt (line 153) | def load_ckpt(self):  # load trained weights

FILE: code/tensrolayer-implemented/ddpg.py
  class DDPG (line 61) | class DDPG(object):
    method __init__ (line 66) | def __init__(self, a_dim, s_dim, a_bound):
    method ema_update (line 132) | def ema_update(self):
    method choose_action (line 142) | def choose_action(self, s):
    method learn (line 150) | def learn(self):
    method store_transition (line 180) | def store_transition(self, s, a, r, s_):
    method save_ckpt (line 196) | def save_ckpt(self):
    method load_ckpt (line 209) | def load_ckpt(self):

FILE: code/tensrolayer-implemented/dqn.py
  function to_one_hot (line 64) | def to_one_hot(i, n_classes=None):
  function get_model (line 72) | def get_model(inputs_shape):
  function save_ckpt (line 78) | def save_ckpt(model):  # save trained weights
  function load_ckpt (line 82) | def load_ckpt(model):  # load trained weights

FILE: code/tensrolayer-implemented/dqn_variants.py
  class MLP (line 98) | class MLP(tl.models.Model):
    method __init__ (line 100) | def __init__(self, name):
    method forward (line 107) | def forward(self, ni):
  class CNN (line 134) | class CNN(tl.models.Model):
    method __init__ (line 136) | def __init__(self, name):
    method forward (line 163) | def forward(self, ni):
  class ReplayBuffer (line 189) | class ReplayBuffer(object):
    method __init__ (line 191) | def __init__(self, size):
    method __len__ (line 196) | def __len__(self):
    method add (line 199) | def add(self, *args):
    method _encode_sample (line 206) | def _encode_sample(self, idxes):
    method sample (line 223) | def sample(self, batch_size):
  function huber_loss (line 229) | def huber_loss(x):
  function sync (line 234) | def sync(net, net_tar):
  function log_softmax (line 240) | def log_softmax(x, dim):
  function softmax (line 245) | def softmax(x, dim):

FILE: code/tensrolayer-implemented/pg.py
  class PolicyGradient (line 53) | class PolicyGradient:
    method __init__ (line 58) | def __init__(self, n_features, n_actions, learning_rate=0.01, reward_d...
    method choose_action (line 96) | def choose_action(self, s):
    method choose_action_greedy (line 106) | def choose_action_greedy(self, s):
    method store_transition (line 115) | def store_transition(self, s, a, r):
    method learn (line 127) | def learn(self):
    method _discount_and_norm_rewards (line 153) | def _discount_and_norm_rewards(self):
    method save_ckpt (line 170) | def save_ckpt(self):
    method load_ckpt (line 179) | def load_ckpt(self):

FILE: code/tensrolayer-implemented/ppo.py
  class PPO (line 65) | class PPO(object):
    method __init__ (line 70) | def __init__(self):
    method a_train (line 85) | def a_train(self, tfs, tfa, tfadv):
    method update_old_pi (line 123) | def update_old_pi(self):
    method c_train (line 131) | def c_train(self, tfdc_r, s): #训练critic网络，mse优化
    method cal_adv (line 147) | def cal_adv(self, tfs, tfdc_r):
    method update (line 158) | def update(self, s, a, r):
    method _build_anet (line 193) | def _build_anet(self, name, trainable):
    method choose_action (line 213) | def choose_action(self, s):
    method get_v (line 225) | def get_v(self, s):
    method save_ckpt (line 235) | def save_ckpt(self):
    method load_ckpt (line 246) | def load_ckpt(self):

FILE: code/tensrolayer-implemented/tutorial_wrappers.py
  function build_env (line 39) | def build_env(env_id, vectorized=False, seed=0, reward_scale=1.0, nenv=0):
  function _make_env (line 52) | def _make_env(env_id, env_type, seed, reward_scale, frame_stack=True):
  function _make_vec_env (line 78) | def _make_vec_env(env_id, env_type, nenv, seed, reward_scale, frame_stac...
  class TimeLimit (line 86) | class TimeLimit(gym.Wrapper):
    method __init__ (line 88) | def __init__(self, env, max_episode_steps=None):
    method step (line 93) | def step(self, ac):
    method reset (line 101) | def reset(self, **kwargs):
  class NoopResetEnv (line 106) | class NoopResetEnv(gym.Wrapper):
    method __init__ (line 108) | def __init__(self, env, noop_max=30):
    method reset (line 118) | def reset(self, **kwargs):
    method step (line 133) | def step(self, ac):
  class FireResetEnv (line 137) | class FireResetEnv(gym.Wrapper):
    method __init__ (line 139) | def __init__(self, env):
    method reset (line 145) | def reset(self, **kwargs):
    method step (line 155) | def step(self, ac):
  class EpisodicLifeEnv (line 159) | class EpisodicLifeEnv(gym.Wrapper):
    method __init__ (line 161) | def __init__(self, env):
    method step (line 169) | def step(self, action):
    method reset (line 183) | def reset(self, **kwargs):
  class MaxAndSkipEnv (line 197) | class MaxAndSkipEnv(gym.Wrapper):
    method __init__ (line 199) | def __init__(self, env, skip=4):
    method step (line 207) | def step(self, action):
    method reset (line 225) | def reset(self, **kwargs):
  class ClipRewardEnv (line 229) | class ClipRewardEnv(gym.RewardWrapper):
    method __init__ (line 231) | def __init__(self, env):
    method reward (line 234) | def reward(self, reward):
  class WarpFrame (line 239) | class WarpFrame(gym.ObservationWrapper):
    method __init__ (line 241) | def __init__(self, env, width=84, height=84, grayscale=True):
    method observation (line 250) | def observation(self, frame):
  class FrameStack (line 260) | class FrameStack(gym.Wrapper):
    method __init__ (line 262) | def __init__(self, env, k):
    method reset (line 274) | def reset(self):
    method step (line 280) | def step(self, action):
    method _get_ob (line 285) | def _get_ob(self):
  class LazyFrames (line 290) | class LazyFrames(object):
    method __init__ (line 292) | def __init__(self, frames):
    method _force (line 302) | def _force(self):
    method __array__ (line 308) | def __array__(self, dtype=None):
    method __len__ (line 314) | def __len__(self):
    method __getitem__ (line 317) | def __getitem__(self, i):
  class RewardScaler (line 321) | class RewardScaler(gym.RewardWrapper):
    method __init__ (line 326) | def __init__(self, env, scale=0.01):
    method reward (line 330) | def reward(self, reward):
  class VecFrameStack (line 334) | class VecFrameStack(object):
    method __init__ (line 336) | def __init__(self, env, k):
    method reset (line 345) | def reset(self):
    method step (line 351) | def step(self, action):
    method _get_ob (line 356) | def _get_ob(self):
  function _worker (line 361) | def _worker(remote, parent_remote, env_fn_wrapper):
  class CloudpickleWrapper (line 386) | class CloudpickleWrapper(object):
    method __init__ (line 391) | def __init__(self, x):
    method __getstate__ (line 394) | def __getstate__(self):
    method __setstate__ (line 398) | def __setstate__(self, ob):
  class SubprocVecEnv (line 403) | class SubprocVecEnv(object):
    method __init__ (line 405) | def __init__(self, env_fns):
    method _step_async (line 434) | def _step_async(self, actions):
    method _step_wait (line 446) | def _step_wait(self):
    method reset (line 461) | def reset(self):
    method _reset_task (line 473) | def _reset_task(self):
    method close (line 478) | def close(self):
    method __len__ (line 490) | def __len__(self):
    method step (line 493) | def step(self, actions):
  class Monitor (line 498) | class Monitor(gym.Wrapper):
    method __init__ (line 500) | def __init__(self, env):
    method reset (line 504) | def reset(self, **kwargs):
    method step (line 508) | def step(self, action):
  class NormalizedActions (line 516) | class NormalizedActions(gym.ActionWrapper):
    method _action (line 518) | def _action(self, action):
    method _reverse_action (line 527) | def _reverse_action(self, action):
  function unit_test (line 537) | def unit_test():