SYMBOL INDEX (212 symbols across 19 files)

FILE: code/AC_Continous.py
  class Actor (line 42) | class Actor(object):
    method __init__ (line 44) | def __init__(self, state_dim, action_dim, action_range, lr=0.001):
    method learn (line 60) | def learn(self, state, td_error):
    method get_action (line 70) | def get_action(self, state, greedy=False):
  class Critic (line 81) | class Critic(object):
    method __init__ (line 83) | def __init__(self, state_dim, lr=0.01):
    method learn (line 92) | def learn(self, state, reward, state_, done):
  class Agent (line 104) | class Agent():
    method __init__ (line 106) | def __init__(self, env):
    method train (line 114) | def train(self):
    method train_episode (line 121) | def train_episode(self):
    method test_episode (line 161) | def test_episode(self):
    method save (line 184) | def save(self):
    method load (line 192) | def load(self):

FILE: code/AC_Discrete.py
  class Actor (line 40) | class Actor(object):
    method __init__ (line 42) | def __init__(self, state_dim, action_dim, lr=0.001):
    method learn (line 51) | def learn(self, state, action, td_error):
    method get_action (line 59) | def get_action(self, state, greedy=False):
  class Critic (line 67) | class Critic(object):
    method __init__ (line 69) | def __init__(self, state_dim, lr=0.01):
    method learn (line 78) | def learn(self, state, reward, state_, done):
  class Agent (line 91) | class Agent():
    method __init__ (line 93) | def __init__(self, env):
    method train (line 100) | def train(self):
    method train_episode (line 107) | def train_episode(self):
    method test_episode (line 148) | def test_episode(self):
    method save (line 171) | def save(self):
    method load (line 179) | def load(self):

FILE: code/DDPG.py
  class ReplayBuffer (line 64) | class ReplayBuffer:
    method __init__ (line 74) | def __init__(self, capacity):
    method push (line 79) | def push(self, state, action, reward, next_state, done):
    method sample (line 85) | def sample(self, batch_size):
    method __len__ (line 90) | def __len__(self):
  class DDPG (line 95) | class DDPG(object):
    method __init__ (line 99) | def __init__(self, action_dim, state_dim, action_range, replay_buffer):
    method ema_update (line 165) | def ema_update(self):
    method get_action (line 175) | def get_action(self, state, greedy=False):
    method learn (line 189) | def learn(self):
    method save (line 217) | def save(self):
    method load (line 230) | def load(self):

FILE: code/DDQN.py
  class ReplayBuffer (line 28) | class ReplayBuffer:
    method __init__ (line 29) | def __init__(self, capacity=10000):
    method push (line 34) | def push(self, state, action, reward, next_state, done):
    method sample (line 40) | def sample(self, batch_size = args.batch_size):
  class Agent (line 52) | class Agent:
    method __init__ (line 53) | def __init__(self, env):
    method target_update (line 75) | def target_update(self):
    method choose_action (line 81) | def choose_action(self, state):
    method replay (line 88) | def replay(self):
    method test_episode (line 111) | def test_episode(self, test_episodes):
    method train (line 126) | def train(self, train_episodes=200):
    method saveModel (line 149) | def saveModel(self):
    method loadModel (line 157) | def loadModel(self):

FILE: code/DQN.py
  class ReplayBuffer (line 28) | class ReplayBuffer:
    method __init__ (line 29) | def __init__(self, capacity=10000):
    method push (line 34) | def push(self, state, action, reward, next_state, done):
    method sample (line 40) | def sample(self, batch_size = args.batch_size):
  class Agent (line 52) | class Agent:
    method __init__ (line 53) | def __init__(self, env):
    method target_update (line 75) | def target_update(self):
    method choose_action (line 81) | def choose_action(self, state):
    method replay (line 88) | def replay(self):
    method test_episode (line 109) | def test_episode(self, test_episodes):
    method train (line 124) | def train(self, train_episodes=200):
    method saveModel (line 148) | def saveModel(self):
    method loadModel (line 156) | def loadModel(self):

FILE: code/Dueling DQN.py
  class ReplayBuffer (line 28) | class ReplayBuffer:
    method __init__ (line 29) | def __init__(self, capacity=10000):
    method push (line 34) | def push(self, state, action, reward, next_state, done):
    method sample (line 40) | def sample(self, batch_size = args.batch_size):
  class Agent (line 52) | class Agent:
    method __init__ (line 53) | def __init__(self, env):
    method target_update (line 82) | def target_update(self):
    method choose_action (line 88) | def choose_action(self, state):
    method replay (line 95) | def replay(self):
    method train (line 112) | def train(self, train_episodes=200):
    method test_episode (line 136) | def test_episode(self, test_episodes):
    method saveModel (line 152) | def saveModel(self):
    method loadModel (line 160) | def loadModel(self):

FILE: code/PG_Continous.py
  class PolicyGradient (line 36) | class PolicyGradient:
    method __init__ (line 38) | def __init__(self, state_dim, action_dim, action_range, lr=0.001, gamm...
    method get_action (line 55) | def get_action(self, state):
    method store_transition (line 63) | def store_transition(self, s, a, r):
    method learn (line 68) | def learn(self):
    method _discount_and_norm_reward (line 84) | def _discount_and_norm_reward(self):
    method save (line 97) | def save(self):
    method load (line 104) | def load(self):

FILE: code/PG_Discrete.py
  class PolicyGradient (line 38) | class PolicyGradient:
    method __init__ (line 42) | def __init__(self, state_dim, action_num, learning_rate=0.02, gamma=0....
    method get_action (line 56) | def get_action(self, s, greedy=False):
    method store_transition (line 69) | def store_transition(self, s, a, r):
    method learn (line 81) | def learn(self):
    method _discount_and_norm_rewards (line 101) | def _discount_and_norm_rewards(self):
    method save (line 118) | def save(self):
    method load (line 128) | def load(self):

FILE: code/PPO.py
  class PPO (line 57) | class PPO(object):
    method __init__ (line 61) | def __init__(self, state_dim, action_dim, action_bound, method='clip'):
    method choose_action (line 103) | def choose_action(self, s):
    method store_transition (line 115) | def store_transition(self, state, action, reward):
    method a_train (line 123) | def a_train(self, state, action, adv):
    method update_old_pi (line 166) | def update_old_pi(self):
    method c_train (line 173) | def c_train(self, reward, state):
    method update (line 184) | def update(self):
    method finish_path (line 223) | def finish_path(self, next_state, done):
    method save_ckpt (line 241) | def save_ckpt(self):
    method load_ckpt (line 250) | def load_ckpt(self):

FILE: code/Q-Learning.py
  class QLearning (line 11) | class QLearning:
    method __init__ (line 12) | def __init__(self, state_dim, action_dim, lr=0.01, gamma=0.9, e_greed=...
    method sample (line 19) | def sample(self, state):
    method predict (line 26) | def predict(self, state):
    method learn (line 36) | def learn(self, state, action, reward, next_state, done):
    method save (line 43) | def save(self):
    method load (line 48) | def load(self, npy_file='./model/qlearning_table.npy'):
  class Agent (line 52) | class Agent:
    method __init__ (line 53) | def __init__(self, env):
    method train (line 62) | def train(self, max_episode):
    method run_episode (line 75) | def run_episode(self, render=False):
    method test_episode (line 92) | def test_episode(self, render=False):

FILE: code/SAC.py
  class ReplayBuffer (line 82) | class ReplayBuffer:
    method __init__ (line 92) | def __init__(self, capacity):
    method push (line 97) | def push(self, state, action, reward, next_state, done):
    method sample (line 103) | def sample(self, BATCH_SIZE):
    method __len__ (line 114) | def __len__(self):
  class SoftQNetwork (line 118) | class SoftQNetwork(Model):
    method __init__ (line 121) | def __init__(self, num_inputs, num_actions, hidden_dim, init_w=3e-3):
    method forward (line 133) | def forward(self, input):
  class PolicyNetwork (line 140) | class PolicyNetwork(Model):
    method __init__ (line 143) | def __init__(
    method forward (line 170) | def forward(self, state):
    method evaluate (line 181) | def evaluate(self, state, epsilon=1e-6):
    method get_action (line 201) | def get_action(self, state, greedy=False):
    method sample_action (line 215) | def sample_action(self, ):
  class SAC (line 221) | class SAC:
    method __init__ (line 223) | def __init__(
    method target_ini (line 261) | def target_ini(self, net, target_net):
    method target_soft_update (line 267) | def target_soft_update(self, net, target_net, soft_tau):
    method update (line 275) | def update(self, batch_size, reward_scale=10., auto_entropy=True, targ...
    method save (line 335) | def save(self):  # save trained weights
    method load_weights (line 347) | def load_weights(self):  # load trained weights

FILE: code/Sarsa.py
  class Sarsa (line 11) | class Sarsa:
    method __init__ (line 12) | def __init__(self, state_dim, action_dim, lr=0.01, gamma=0.9, e_greed=...
    method sample (line 19) | def sample(self, state):
    method predict (line 26) | def predict(self, state):
    method learn (line 36) | def learn(self, state, action, reward, next_state, next_action, done):
    method save (line 43) | def save(self):
    method load (line 48) | def load(self, npy_file='./model/sarsa_q_table.npy'):
  class Agent (line 52) | class Agent:
    method __init__ (line 53) | def __init__(self, env):
    method train (line 62) | def train(self, max_episode):
    method run_episode (line 75) | def run_episode(self, render=False):
    method test_episode (line 94) | def test_episode(self, render=False):

FILE: code/TD3.py
  class ReplayBuffer (line 94) | class ReplayBuffer:
    method __init__ (line 104) | def __init__(self, capacity):
    method push (line 109) | def push(self, state, action, reward, next_state, done):
    method sample (line 115) | def sample(self, batch_size):
    method __len__ (line 120) | def __len__(self):
  class QNetwork (line 124) | class QNetwork(Model):
    method __init__ (line 127) | def __init__(self, num_inputs, num_actions, hidden_dim, init_w=3e-3):
    method forward (line 136) | def forward(self, input):
  class PolicyNetwork (line 143) | class PolicyNetwork(Model):
    method __init__ (line 146) | def __init__(self, num_inputs, num_actions, hidden_dim, action_range=1...
    method forward (line 160) | def forward(self, state):
    method evaluate (line 167) | def evaluate(self, state, eval_noise_scale):
    method get_action (line 185) | def get_action(self, state, explore_noise_scale, greedy=False):
    method sample_action (line 197) | def sample_action(self):
  class TD3 (line 203) | class TD3:
    method __init__ (line 205) | def __init__(
    method target_ini (line 241) | def target_ini(self, net, target_net):
    method target_soft_update (line 247) | def target_soft_update(self, net, target_net, soft_tau):
    method update (line 255) | def update(self, batch_size, eval_noise_scale, reward_scale=10., gamma...
    method save (line 310) | def save(self):  # save trained weights
    method load (line 322) | def load(self):  # load trained weights

FILE: code_pytorch/DQN.py
  class Agent (line 18) | class Agent():
    method __init__ (line 19) | def __init__(self, state_dim, action_dim, args, double_dqn = False):
    method e_greedy_action (line 40) | def e_greedy_action(self,state):
    method action (line 50) | def action(self,state):
    method learn (line 56) | def learn(self):
    method test_episode (line 89) | def test_episode(self, test_episodes):
    method train (line 104) | def train(self, train_episodes=200):
    method save (line 126) | def save(self):
    method load (line 137) | def load(self, path):

FILE: code_pytorch/PG_Continue.py
  class Net (line 44) | class Net(nn.Module):
    method __init__ (line 45) | def __init__(self, state_dim, action_dim, action_bound):
    method forward (line 52) | def forward(self, state):
  class PolicyGradient (line 60) | class PolicyGradient:
    method __init__ (line 61) | def __init__(self, state_dim, action_dim, action_bound, learning_rate=...
    method get_action (line 70) | def get_action(self, state, greedy=False):
    method store_transition (line 81) | def store_transition(self, s, a, r):
    method learn (line 87) | def learn(self):
    method _discount_and_norm_rewards (line 107) | def _discount_and_norm_rewards(self):
    method save (line 124) | def save(self):
    method load (line 135) | def load(self):

FILE: code_pytorch/PG_Discreate.py
  class Net (line 35) | class Net(nn.Module):
    method __init__ (line 36) | def __init__(self, s_dim, hidden, a_num):
    method forward (line 42) | def forward(self, s):
  class PolicyGradient (line 46) | class PolicyGradient:
    method __init__ (line 47) | def __init__(self, state_dim, action_num, learning_rate=0.01, gamma=0.9):
    method get_action (line 55) | def get_action(self, s, greedy=False):
    method store_transition (line 71) | def store_transition(self, s, a, r):
    method learn (line 77) | def learn(self):
    method _discount_and_norm_rewards (line 98) | def _discount_and_norm_rewards(self):
    method save (line 115) | def save(self):
    method load (line 126) | def load(self):

FILE: code_pytorch/buffer.py
  class ReplayBUffer (line 4) | class ReplayBUffer:
    method __init__ (line 5) | def __init__(self, args):
    method remember (line 12) | def remember(self, state, action, reward, next_state, done):
    method sample (line 18) | def sample(self, batch_size):
    method __len__ (line 23) | def __len__(self):

FILE: code_pytorch/network.py
  class BasicQNetwork (line 6) | class BasicQNetwork(nn.Module):
    method __init__ (line 7) | def __init__(self, input_size, action_size, args):
    method forward (line 17) | def forward(self, q_network_input):

FILE: code_pytorch/parameter.py
  function get_common_args (line 4) | def get_common_args():