SYMBOL INDEX (212 symbols across 19 files) FILE: code/AC_Continous.py class Actor (line 42) | class Actor(object): method __init__ (line 44) | def __init__(self, state_dim, action_dim, action_range, lr=0.001): method learn (line 60) | def learn(self, state, td_error): method get_action (line 70) | def get_action(self, state, greedy=False): class Critic (line 81) | class Critic(object): method __init__ (line 83) | def __init__(self, state_dim, lr=0.01): method learn (line 92) | def learn(self, state, reward, state_, done): class Agent (line 104) | class Agent(): method __init__ (line 106) | def __init__(self, env): method train (line 114) | def train(self): method train_episode (line 121) | def train_episode(self): method test_episode (line 161) | def test_episode(self): method save (line 184) | def save(self): method load (line 192) | def load(self): FILE: code/AC_Discrete.py class Actor (line 40) | class Actor(object): method __init__ (line 42) | def __init__(self, state_dim, action_dim, lr=0.001): method learn (line 51) | def learn(self, state, action, td_error): method get_action (line 59) | def get_action(self, state, greedy=False): class Critic (line 67) | class Critic(object): method __init__ (line 69) | def __init__(self, state_dim, lr=0.01): method learn (line 78) | def learn(self, state, reward, state_, done): class Agent (line 91) | class Agent(): method __init__ (line 93) | def __init__(self, env): method train (line 100) | def train(self): method train_episode (line 107) | def train_episode(self): method test_episode (line 148) | def test_episode(self): method save (line 171) | def save(self): method load (line 179) | def load(self): FILE: code/DDPG.py class ReplayBuffer (line 64) | class ReplayBuffer: method __init__ (line 74) | def __init__(self, capacity): method push (line 79) | def push(self, state, action, reward, next_state, done): method sample (line 85) | def sample(self, batch_size): method __len__ (line 90) | def __len__(self): class DDPG (line 95) | class DDPG(object): method __init__ (line 99) | def __init__(self, action_dim, state_dim, action_range, replay_buffer): method ema_update (line 165) | def ema_update(self): method get_action (line 175) | def get_action(self, state, greedy=False): method learn (line 189) | def learn(self): method save (line 217) | def save(self): method load (line 230) | def load(self): FILE: code/DDQN.py class ReplayBuffer (line 28) | class ReplayBuffer: method __init__ (line 29) | def __init__(self, capacity=10000): method push (line 34) | def push(self, state, action, reward, next_state, done): method sample (line 40) | def sample(self, batch_size = args.batch_size): class Agent (line 52) | class Agent: method __init__ (line 53) | def __init__(self, env): method target_update (line 75) | def target_update(self): method choose_action (line 81) | def choose_action(self, state): method replay (line 88) | def replay(self): method test_episode (line 111) | def test_episode(self, test_episodes): method train (line 126) | def train(self, train_episodes=200): method saveModel (line 149) | def saveModel(self): method loadModel (line 157) | def loadModel(self): FILE: code/DQN.py class ReplayBuffer (line 28) | class ReplayBuffer: method __init__ (line 29) | def __init__(self, capacity=10000): method push (line 34) | def push(self, state, action, reward, next_state, done): method sample (line 40) | def sample(self, batch_size = args.batch_size): class Agent (line 52) | class Agent: method __init__ (line 53) | def __init__(self, env): method target_update (line 75) | def target_update(self): method choose_action (line 81) | def choose_action(self, state): method replay (line 88) | def replay(self): method test_episode (line 109) | def test_episode(self, test_episodes): method train (line 124) | def train(self, train_episodes=200): method saveModel (line 148) | def saveModel(self): method loadModel (line 156) | def loadModel(self): FILE: code/Dueling DQN.py class ReplayBuffer (line 28) | class ReplayBuffer: method __init__ (line 29) | def __init__(self, capacity=10000): method push (line 34) | def push(self, state, action, reward, next_state, done): method sample (line 40) | def sample(self, batch_size = args.batch_size): class Agent (line 52) | class Agent: method __init__ (line 53) | def __init__(self, env): method target_update (line 82) | def target_update(self): method choose_action (line 88) | def choose_action(self, state): method replay (line 95) | def replay(self): method train (line 112) | def train(self, train_episodes=200): method test_episode (line 136) | def test_episode(self, test_episodes): method saveModel (line 152) | def saveModel(self): method loadModel (line 160) | def loadModel(self): FILE: code/PG_Continous.py class PolicyGradient (line 36) | class PolicyGradient: method __init__ (line 38) | def __init__(self, state_dim, action_dim, action_range, lr=0.001, gamm... method get_action (line 55) | def get_action(self, state): method store_transition (line 63) | def store_transition(self, s, a, r): method learn (line 68) | def learn(self): method _discount_and_norm_reward (line 84) | def _discount_and_norm_reward(self): method save (line 97) | def save(self): method load (line 104) | def load(self): FILE: code/PG_Discrete.py class PolicyGradient (line 38) | class PolicyGradient: method __init__ (line 42) | def __init__(self, state_dim, action_num, learning_rate=0.02, gamma=0.... method get_action (line 56) | def get_action(self, s, greedy=False): method store_transition (line 69) | def store_transition(self, s, a, r): method learn (line 81) | def learn(self): method _discount_and_norm_rewards (line 101) | def _discount_and_norm_rewards(self): method save (line 118) | def save(self): method load (line 128) | def load(self): FILE: code/PPO.py class PPO (line 57) | class PPO(object): method __init__ (line 61) | def __init__(self, state_dim, action_dim, action_bound, method='clip'): method choose_action (line 103) | def choose_action(self, s): method store_transition (line 115) | def store_transition(self, state, action, reward): method a_train (line 123) | def a_train(self, state, action, adv): method update_old_pi (line 166) | def update_old_pi(self): method c_train (line 173) | def c_train(self, reward, state): method update (line 184) | def update(self): method finish_path (line 223) | def finish_path(self, next_state, done): method save_ckpt (line 241) | def save_ckpt(self): method load_ckpt (line 250) | def load_ckpt(self): FILE: code/Q-Learning.py class QLearning (line 11) | class QLearning: method __init__ (line 12) | def __init__(self, state_dim, action_dim, lr=0.01, gamma=0.9, e_greed=... method sample (line 19) | def sample(self, state): method predict (line 26) | def predict(self, state): method learn (line 36) | def learn(self, state, action, reward, next_state, done): method save (line 43) | def save(self): method load (line 48) | def load(self, npy_file='./model/qlearning_table.npy'): class Agent (line 52) | class Agent: method __init__ (line 53) | def __init__(self, env): method train (line 62) | def train(self, max_episode): method run_episode (line 75) | def run_episode(self, render=False): method test_episode (line 92) | def test_episode(self, render=False): FILE: code/SAC.py class ReplayBuffer (line 82) | class ReplayBuffer: method __init__ (line 92) | def __init__(self, capacity): method push (line 97) | def push(self, state, action, reward, next_state, done): method sample (line 103) | def sample(self, BATCH_SIZE): method __len__ (line 114) | def __len__(self): class SoftQNetwork (line 118) | class SoftQNetwork(Model): method __init__ (line 121) | def __init__(self, num_inputs, num_actions, hidden_dim, init_w=3e-3): method forward (line 133) | def forward(self, input): class PolicyNetwork (line 140) | class PolicyNetwork(Model): method __init__ (line 143) | def __init__( method forward (line 170) | def forward(self, state): method evaluate (line 181) | def evaluate(self, state, epsilon=1e-6): method get_action (line 201) | def get_action(self, state, greedy=False): method sample_action (line 215) | def sample_action(self, ): class SAC (line 221) | class SAC: method __init__ (line 223) | def __init__( method target_ini (line 261) | def target_ini(self, net, target_net): method target_soft_update (line 267) | def target_soft_update(self, net, target_net, soft_tau): method update (line 275) | def update(self, batch_size, reward_scale=10., auto_entropy=True, targ... method save (line 335) | def save(self): # save trained weights method load_weights (line 347) | def load_weights(self): # load trained weights FILE: code/Sarsa.py class Sarsa (line 11) | class Sarsa: method __init__ (line 12) | def __init__(self, state_dim, action_dim, lr=0.01, gamma=0.9, e_greed=... method sample (line 19) | def sample(self, state): method predict (line 26) | def predict(self, state): method learn (line 36) | def learn(self, state, action, reward, next_state, next_action, done): method save (line 43) | def save(self): method load (line 48) | def load(self, npy_file='./model/sarsa_q_table.npy'): class Agent (line 52) | class Agent: method __init__ (line 53) | def __init__(self, env): method train (line 62) | def train(self, max_episode): method run_episode (line 75) | def run_episode(self, render=False): method test_episode (line 94) | def test_episode(self, render=False): FILE: code/TD3.py class ReplayBuffer (line 94) | class ReplayBuffer: method __init__ (line 104) | def __init__(self, capacity): method push (line 109) | def push(self, state, action, reward, next_state, done): method sample (line 115) | def sample(self, batch_size): method __len__ (line 120) | def __len__(self): class QNetwork (line 124) | class QNetwork(Model): method __init__ (line 127) | def __init__(self, num_inputs, num_actions, hidden_dim, init_w=3e-3): method forward (line 136) | def forward(self, input): class PolicyNetwork (line 143) | class PolicyNetwork(Model): method __init__ (line 146) | def __init__(self, num_inputs, num_actions, hidden_dim, action_range=1... method forward (line 160) | def forward(self, state): method evaluate (line 167) | def evaluate(self, state, eval_noise_scale): method get_action (line 185) | def get_action(self, state, explore_noise_scale, greedy=False): method sample_action (line 197) | def sample_action(self): class TD3 (line 203) | class TD3: method __init__ (line 205) | def __init__( method target_ini (line 241) | def target_ini(self, net, target_net): method target_soft_update (line 247) | def target_soft_update(self, net, target_net, soft_tau): method update (line 255) | def update(self, batch_size, eval_noise_scale, reward_scale=10., gamma... method save (line 310) | def save(self): # save trained weights method load (line 322) | def load(self): # load trained weights FILE: code_pytorch/DQN.py class Agent (line 18) | class Agent(): method __init__ (line 19) | def __init__(self, state_dim, action_dim, args, double_dqn = False): method e_greedy_action (line 40) | def e_greedy_action(self,state): method action (line 50) | def action(self,state): method learn (line 56) | def learn(self): method test_episode (line 89) | def test_episode(self, test_episodes): method train (line 104) | def train(self, train_episodes=200): method save (line 126) | def save(self): method load (line 137) | def load(self, path): FILE: code_pytorch/PG_Continue.py class Net (line 44) | class Net(nn.Module): method __init__ (line 45) | def __init__(self, state_dim, action_dim, action_bound): method forward (line 52) | def forward(self, state): class PolicyGradient (line 60) | class PolicyGradient: method __init__ (line 61) | def __init__(self, state_dim, action_dim, action_bound, learning_rate=... method get_action (line 70) | def get_action(self, state, greedy=False): method store_transition (line 81) | def store_transition(self, s, a, r): method learn (line 87) | def learn(self): method _discount_and_norm_rewards (line 107) | def _discount_and_norm_rewards(self): method save (line 124) | def save(self): method load (line 135) | def load(self): FILE: code_pytorch/PG_Discreate.py class Net (line 35) | class Net(nn.Module): method __init__ (line 36) | def __init__(self, s_dim, hidden, a_num): method forward (line 42) | def forward(self, s): class PolicyGradient (line 46) | class PolicyGradient: method __init__ (line 47) | def __init__(self, state_dim, action_num, learning_rate=0.01, gamma=0.9): method get_action (line 55) | def get_action(self, s, greedy=False): method store_transition (line 71) | def store_transition(self, s, a, r): method learn (line 77) | def learn(self): method _discount_and_norm_rewards (line 98) | def _discount_and_norm_rewards(self): method save (line 115) | def save(self): method load (line 126) | def load(self): FILE: code_pytorch/buffer.py class ReplayBUffer (line 4) | class ReplayBUffer: method __init__ (line 5) | def __init__(self, args): method remember (line 12) | def remember(self, state, action, reward, next_state, done): method sample (line 18) | def sample(self, batch_size): method __len__ (line 23) | def __len__(self): FILE: code_pytorch/network.py class BasicQNetwork (line 6) | class BasicQNetwork(nn.Module): method __init__ (line 7) | def __init__(self, input_size, action_size, args): method forward (line 17) | def forward(self, q_network_input): FILE: code_pytorch/parameter.py function get_common_args (line 4) | def get_common_args():