Showing preview only (237K chars total). Download the full file or copy to clipboard to get everything.
Repository: xiaochus/Deep-Reinforcement-Learning-Practice
Branch: master
Commit: 6b2efff2d605
Files: 35
Total size: 226.9 KB
Directory structure:
gitextract_0r3h2dqf/
├── .gitignore
├── A3C_sparse.py
├── AC_continue.py
├── AC_sparse.py
├── DDPG.py
├── DQN.py
├── DRL.py
├── DoubleDQN.py
├── DuelingDQN.py
├── NatureDQN.py
├── PPO_TF.py
├── PolicyNetwork.py
├── README.md
├── game/
│ ├── CartPole.py
│ └── Pendulum.py
├── history/
│ ├── a3c_sparse.csv
│ ├── ac_continue.csv
│ ├── ac_sparse.csv
│ ├── ddpg.csv
│ ├── ddqn.csv
│ ├── dueling.csv
│ ├── ndqn.csv
│ ├── pg.csv
│ ├── ppo1.csv
│ └── ppo2.csv
└── model/
├── actor_a3cs.h5
├── actor_acs.h5
├── critic_a3cs.h5
├── critic_acs.h5
├── ddpg_actor.h5
├── ddpg_critic.h5
├── ddqn.h5
├── dueling.h5
├── ndqn.h5
└── pg.h5
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
================================================
FILE: A3C_sparse.py
================================================
# -*- coding: utf-8 -*-
import os
import gym
import time
import threading
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam
import keras.backend as K
# global variables for threading
step = 0
history = {'episode': [], 'Episode_reward': []}
lock = threading.Lock()
class A3C:
"""A3C Algorithms with sparse action.
"""
def __init__(self):
self.gamma = 0.95
self.actor_lr = 0.001
self.critic_lr = 0.01
self._build_model()
self.optimizer = self._build_optimizer()
# handle error
self.sess = tf.InteractiveSession()
K.set_session(self.sess)
self.sess.run(tf.global_variables_initializer())
def _build_actor(self):
"""actor model.
"""
inputs = Input(shape=(4,))
x = Dense(20, activation='relu')(inputs)
x = Dense(20, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(inputs=inputs, outputs=x)
return model
def _build_critic(self):
"""critic model.
"""
inputs = Input(shape=(4,))
x = Dense(20, activation='relu')(inputs)
x = Dense(20, activation='relu')(x)
x = Dense(1, activation='linear')(x)
model = Model(inputs=inputs, outputs=x)
return model
def _build_model(self):
"""build model for multi threading training.
"""
self.actor = self._build_actor()
self.critic = self._build_critic()
# Pre-compile for threading
self.actor._make_predict_function()
self.critic._make_predict_function()
def _build_optimizer(self):
"""build optimizer and loss method.
Returns:
[actor optimizer, critic optimizer].
"""
# actor optimizer
actions = K.placeholder(shape=(None, 1))
advantages = K.placeholder(shape=(None, 1))
action_pred = self.actor.output
entropy = K.sum(action_pred * K.log(action_pred + 1e-10), axis=1)
closs = K.binary_crossentropy(actions, action_pred)
actor_loss = K.mean(closs * K.flatten(advantages)) - 0.01 * entropy
actor_optimizer = Adam(lr=self.actor_lr)
actor_updates = actor_optimizer.get_updates(self.actor.trainable_weights, [], actor_loss)
actor_train = K.function([self.actor.input, actions, advantages], [], updates=actor_updates)
# critic optimizer
discounted_reward = K.placeholder(shape=(None, 1))
value = self.critic.output
critic_loss = K.mean(K.square(discounted_reward - value))
critic_optimizer = Adam(lr=self.critic_lr)
critic_updates = critic_optimizer.get_updates(self.critic.trainable_weights, [], critic_loss)
critic_train = K.function([self.critic.input, discounted_reward], [], updates=critic_updates)
return [actor_train, critic_train]
def train(self, episode, n_thread, update_iter):
"""training A3C.
Arguments:
episode: total training episode.
n_thread: number of thread.
update_iter: update iter.
"""
# Multi threading training.
threads = [Agent(i, self.actor, self.critic, self.optimizer, self.gamma, episode, update_iter) for i in range(n_thread)]
for t in threads:
t.start()
time.sleep(1)
try:
[t.join() for t in threads]
except KeyboardInterrupt:
print("Exiting all threads...")
self.save()
def load(self):
"""Load model weights.
"""
if os.path.exists('model/actor_a3cs.h5') and os.path.exists('model/critic_a3cs.h5'):
self.actor.load_weights('model/actor_a3cs.h5')
self.critic.load_weights('model/critic_a3cs.h5')
def save(self):
"""Save model weights.
"""
self.actor.save_weights('model/actor_a3cs.h5')
self.critic.save_weights('model/critic_a3cs.h5')
class Agent(threading.Thread):
"""Multi threading training agent.
"""
def __init__(self, index, actor, critic, optimizer, gamma, episode, update_iter):
threading.Thread.__init__(self)
self.index = index
self.actor = actor
self.critic = critic
self.optimizer = optimizer
self.gamma = gamma
self.episode = episode
self.update_iter = update_iter
self.env = gym.make('CartPole-v0')
def run(self):
"""training model.
"""
global history
global step
while step < self.episode:
observation = self.env.reset()
states = []
actions = []
rewards = []
while True:
x = observation.reshape(-1, 4)
states.append(x)
# choice action with prob.
prob = self.actor.predict(x)[0][0]
action = np.random.choice(np.array(range(2)), p=[1 - prob, prob])
actions.append(action)
next_observation, reward, done, _ = self.env.step(action)
next_observation = next_observation.reshape(-1, 4)
rewards.append(reward)
observation = next_observation[0]
if ((step + 1) % self.update_iter == 0) or done:
lock.acquire()
try:
self.train_episode(states, actions, rewards, next_observation, done)
if done:
episode_reward = sum(rewards)
history['episode'].append(step)
history['Episode_reward'].append(episode_reward)
print('Thread: {} | Episode: {} | Episode reward: {}'.format(self.index, step, episode_reward))
step += 1
finally:
lock.release()
if done:
break
def discount_reward(self, rewards, next_state, done):
"""Discount reward
Arguments:
rewards: rewards in a episode.
next_states: next state of current game step.
done: if epsiode done.
Returns:
discount_reward: n-step discount rewards.
"""
# compute the discounted reward backwards through time.
discount_rewards = np.zeros_like(rewards, dtype=np.float32)
if done:
cumulative = 0.
else:
cumulative = self.critic.predict(next_state)[0][0]
for i in reversed(range(len(rewards))):
cumulative = cumulative * self.gamma + rewards[i]
discount_rewards[i] = cumulative
return discount_rewards
def train_episode(self, states, actions, rewards, next_observation, done):
"""training algorithm in an epsiode.
"""
states = np.concatenate(states, axis=0)
actions = np.array(actions).reshape(-1, 1)
rewards = np.array(rewards)
# Q_values
values = self.critic.predict(states)
# discounted rewards
discounted_rewards = self.discount_reward(rewards, next_observation, done)
discounted_rewards = discounted_rewards.reshape(-1, 1)
# advantages
advantages = discounted_rewards - values
self.optimizer[1]([states, discounted_rewards])
self.optimizer[0]([states, actions, advantages])
def save_history(history, name):
"""save reward history.
"""
name = os.path.join('history', name)
df = pd.DataFrame.from_dict(history)
df.to_csv(name, index=False, encoding='utf-8')
def play(model):
"""play game with model.
"""
print('play...')
env = gym.make('CartPole-v0')
observation = env.reset()
reward_sum = 0
random_episodes = 0
while random_episodes < 10:
env.render()
prob = model.actor.predict(observation.reshape(-1, 4))[0][0]
action = 1 if prob > 0.5 else 0
observation, reward, done, _ = env.step(action)
reward_sum += reward
if done:
print("Reward for this episode was: {}".format(reward_sum))
random_episodes += 1
reward_sum = 0
observation = env.reset()
env.close()
if __name__ == '__main__':
model = A3C()
#
# model.train(2000, 4, 10)
# save_history(history, 'a3c_sparse.csv')
model.load()
play(model)
================================================
FILE: AC_continue.py
================================================
# -*- coding: utf-8 -*-
import os
import gym
import numpy as np
from keras.layers import Input, Dense, concatenate, Lambda
from keras.models import Model
from keras.optimizers import Adam
import keras.backend as K
from DRL import DRL
class AC(DRL):
"""Actor Critic Algorithms with continuous action.
not stable during training.
"""
def __init__(self):
super(AC, self).__init__()
self.env = gym.make('Pendulum-v0')
self.bound = self.env.action_space.high[0]
self.actor = self._build_actor()
self.critic = self._build_critic()
if os.path.exists('model/actor_acc.h5') and os.path.exists('model/critic_acc.h5'):
self.actor.load_weights('model/actor_acc.h5')
self.critic.load_weights('model/critic_acc.h5')
self.gamma = 0.9
def _build_actor(self):
"""actor model.
"""
inputs = Input(shape=(3,))
x = Dense(20, activation='relu')(inputs)
x = Dense(20, activation='relu')(x)
mu = Dense(1, activation='tanh')(x)
mu = Lambda(lambda x: self.bound * x)(mu)
sigma = Dense(1, activation='softplus')(x)
sigma = Lambda(lambda x: x + 0.0001)(sigma)
out = concatenate([mu, sigma], axis=-1)
model = Model(inputs=inputs, outputs=out)
return model
def _build_critic(self):
"""critic model.
"""
inputs = Input(shape=(3,))
x = Dense(20, activation='relu')(inputs)
x = Dense(20, activation='relu')(x)
x = Dense(1, activation='linear')(x)
model = Model(inputs=inputs, outputs=x)
return model
def _actor_loss(self, y_true, y_pred):
"""actor loss function.
Arguments:
y_true: (action, reward)
y_pred: action
Returns:
loss: reward loss
"""
mu, sigma = y_pred[:, 0], y_pred[:, 1]
action_true, td_error = y_true[:, 0], y_true[:, 1]
# probability density function
pdf = 1. / K.sqrt(2. * np.pi * sigma) * K.exp(-K.square(action_true - mu) / (2. * sigma))
log_pdf = K.log(pdf + K.epsilon())
# entropy for explore
entropy = K.sum(0.5 * (K.log(2. * np.pi * sigma) + 1.))
exp_v = log_pdf * td_error
exp_v = K.sum(exp_v + 0.01 * entropy)
loss = -exp_v
return loss
def discount_reward(self, next_states, reward):
"""Discount reward for critic
Arguments:
next_states: next_states
rewards: reward of last action.
done: if game done.
"""
q = self.critic.predict(next_states)[0][0]
target = reward + self.gamma * q
return target
def choice_action(self, x):
"""choice continuous action from normal distributions.
Arguments:
x: state
Returns:
action: action
"""
mu, sigma = self.actor.predict(x)[0]
epsilon = np.random.randn(1)[0]
action = mu + np.sqrt(sigma) * epsilon
action = np.clip(action, -self.bound, self.bound)
return action
def train(self, episode):
"""training model.
Arguments:
episode: ganme episode
Returns:
history: training history
"""
self.actor.compile(loss=self._actor_loss, optimizer=Adam(lr=0.001))
self.critic.compile(loss='mse', optimizer=Adam(lr=0.002))
history = {'episode': [], 'Episode_reward': [],
'actor_loss': [], 'critic_loss': []}
for i in range(episode):
observation = self.env.reset()
rewards = []
alosses = []
closses = []
while True:
x = observation.reshape(-1, 3)
action = self.choice_action(x)
next_observation, reward, done, _ = self.env.step([action])
next_observation = next_observation.reshape(-1, 3)
rewards.append(reward)
target = self.discount_reward(next_observation, reward)
y = np.array([target])
# TD_error = (r + gamma * next_q) - current_q
td_error = target - self.critic.predict(x)[0][0]
# loss1 = mse((r + gamma * next_q), current_q)
loss1 = self.critic.train_on_batch(x, y)
y = np.array([[action, td_error]])
loss2 = self.actor.train_on_batch(x, y)
observation = next_observation[0]
alosses.append(loss2)
closses.append(loss1)
if done:
episode_reward = np.sum(rewards)
aloss = np.mean(alosses)
closs = np.mean(closses)
history['episode'].append(i)
history['Episode_reward'].append(episode_reward)
history['actor_loss'].append(aloss)
history['critic_loss'].append(closs)
print('Episode: {} | Episode reward: {:.2f} | actor_loss: {:.3f} | critic_loss: {:.3f}'.format(i, episode_reward, aloss, closs))
break
self.actor.save_weights('model/actor_acc.h5')
self.critic.save_weights('model/critic_acc.h5')
return history
def play(self):
"""play game with model.
"""
print('play...')
observation = self.env.reset()
reward_sum = 0
random_episodes = 0
while random_episodes < 10:
self.env.render()
x = observation.reshape(-1, 3)
action = self.choice_action(x)
observation, reward, done, _ = self.env.step([action])
reward_sum += reward
if done:
print("Reward for this episode was: {}".format(reward_sum))
random_episodes += 1
reward_sum = 0
observation = self.env.reset()
self.env.close()
if __name__ == '__main__':
model = AC()
history = model.train(500)
model.save_history(history, 'ac_continue.csv')
model.play()
================================================
FILE: AC_sparse.py
================================================
# -*- coding: utf-8 -*-
import os
import numpy as np
from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam
import keras.backend as K
from DRL import DRL
class AC(DRL):
"""Actor Critic Algorithms with sparse action.
"""
def __init__(self):
super(AC, self).__init__()
self.actor = self._build_actor()
self.critic = self._build_critic()
self.gamma = 0.9
def load(self):
if os.path.exists('model/actor_acs.h5') and os.path.exists('model/critic_acs.h5'):
self.actor.load_weights('model/actor_acs.h5')
self.critic.load_weights('model/critic_acs.h5')
def _build_actor(self):
"""actor model.
"""
inputs = Input(shape=(4,))
x = Dense(20, activation='relu')(inputs)
x = Dense(20, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(inputs=inputs, outputs=x)
return model
def _build_critic(self):
"""critic model.
"""
inputs = Input(shape=(4,))
x = Dense(20, activation='relu')(inputs)
x = Dense(20, activation='relu')(x)
x = Dense(1, activation='linear')(x)
model = Model(inputs=inputs, outputs=x)
return model
def _actor_loss(self, y_true, y_pred):
"""actor loss function.
Arguments:
y_true: (action, reward)
y_pred: action_prob
Returns:
loss: reward loss
"""
action_pred = y_pred
action_true, td_error = y_true[:, 0], y_true[:, 1]
action_true = K.reshape(action_true, (-1, 1))
loss = K.binary_crossentropy(action_true, action_pred)
loss = loss * K.flatten(td_error)
return loss
def discount_reward(self, next_states, reward, done):
"""Discount reward for Critic
Arguments:
next_states: next_states
rewards: reward of last action.
done: if game done.
"""
q = self.critic.predict(next_states)[0][0]
target = reward
if not done:
target = reward + self.gamma * q
return target
def train(self, episode):
"""training model.
Arguments:
episode: ganme episode
Returns:
history: training history
"""
self.actor.compile(loss=self._actor_loss, optimizer=Adam(lr=0.001))
self.critic.compile(loss='mse', optimizer=Adam(lr=0.01))
history = {'episode': [], 'Episode_reward': [],
'actor_loss': [], 'critic_loss': []}
for i in range(episode):
observation = self.env.reset()
rewards = []
alosses = []
closses = []
while True:
x = observation.reshape(-1, 4)
# choice action with prob.
prob = self.actor.predict(x)[0][0]
action = np.random.choice(np.array(range(2)), p=[1 - prob, prob])
next_observation, reward, done, _ = self.env.step(action)
next_observation = next_observation.reshape(-1, 4)
rewards.append(reward)
target = self.discount_reward(next_observation, reward, done)
y = np.array([target])
# TD_error = (r + gamma * next_q) - current_q
td_error = target - self.critic.predict(x)[0][0]
# loss1 = mse((r + gamma * next_q), current_q)
loss1 = self.critic.train_on_batch(x, y)
y = np.array([[action, td_error]])
loss2 = self.actor.train_on_batch(x, y)
observation = next_observation[0]
alosses.append(loss2)
closses.append(loss1)
if done:
episode_reward = sum(rewards)
aloss = np.mean(alosses)
closs = np.mean(closses)
history['episode'].append(i)
history['Episode_reward'].append(episode_reward)
history['actor_loss'].append(aloss)
history['critic_loss'].append(closs)
print('Episode: {} | Episode reward: {} | actor_loss: {:.3f} | critic_loss: {:.3f}'.format(i, episode_reward, aloss, closs))
break
self.actor.save_weights('model/actor_acs.h5')
self.critic.save_weights('model/critic_acs.h5')
return history
if __name__ == '__main__':
model = AC()
history = model.train(300)
model.save_history(history, 'ac_sparse.csv')
model.load()
model.play('acs')
================================================
FILE: DDPG.py
================================================
# -*- coding: utf-8 -*-
import os
import random
import gym
from collections import deque
import numpy as np
import tensorflow as tf
from keras.layers import Input, Dense, Lambda, concatenate
from keras.models import Model
from keras.optimizers import Adam
import keras.backend as K
from DRL import DRL
class DDPG(DRL):
"""Deep Deterministic Policy Gradient Algorithms.
"""
def __init__(self):
super(DDPG, self).__init__()
self.sess = K.get_session()
self.env = gym.make('Pendulum-v0')
self.bound = self.env.action_space.high[0]
# update rate for target model.
self.TAU = 0.01
# experience replay.
self.memory_buffer = deque(maxlen=4000)
# discount rate for q value.
self.gamma = 0.95
# epsilon of action selection
self.epsilon = 1.0
# discount rate for epsilon.
self.epsilon_decay = 0.995
# min epsilon of ε-greedy.
self.epsilon_min = 0.01
# actor learning rate
self.a_lr = 0.0001
# critic learining rate
self.c_lr = 0.001
# ddpg model
self.actor = self._build_actor()
self.critic = self._build_critic()
# target model
self.target_actor = self._build_actor()
self.target_actor.set_weights(self.actor.get_weights())
self.target_critic = self._build_critic()
self.target_critic.set_weights(self.critic.get_weights())
# gradient function
self.get_critic_grad = self.critic_gradient()
self.actor_optimizer()
def load(self):
if os.path.exists('model/ddpg_actor.h5') and os.path.exists('model/ddpg_critic.h5'):
self.actor.load_weights('model/ddpg_actor.h5')
self.critic.load_weights('model/ddpg_critic.h5')
def _build_actor(self):
"""Actor model.
"""
inputs = Input(shape=(3,), name='state_input')
x = Dense(40, activation='relu')(inputs)
x = Dense(40, activation='relu')(x)
x = Dense(1, activation='tanh')(x)
output = Lambda(lambda x: x * self.bound)(x)
model = Model(inputs=inputs, outputs=output)
model.compile(loss='mse', optimizer=Adam(lr=self.a_lr))
return model
def _build_critic(self):
"""Critic model.
"""
sinput = Input(shape=(3,), name='state_input')
ainput = Input(shape=(1,), name='action_input')
s = Dense(40, activation='relu')(sinput)
a = Dense(40, activation='relu')(ainput)
x = concatenate([s, a])
x = Dense(40, activation='relu')(x)
output = Dense(1, activation='linear')(x)
model = Model(inputs=[sinput, ainput], outputs=output)
model.compile(loss='mse', optimizer=Adam(lr=self.c_lr))
return model
def actor_optimizer(self):
"""actor_optimizer.
Returns:
function, opt function for actor.
"""
self.ainput = self.actor.input
aoutput = self.actor.output
trainable_weights = self.actor.trainable_weights
self.action_gradient = tf.placeholder(tf.float32, shape=(None, 1))
# tf.gradients will calculate dy/dx with a initial gradients for y
# action_gradient is dq / da, so this is dq/da * da/dparams
params_grad = tf.gradients(aoutput, trainable_weights, -self.action_gradient)
grads = zip(params_grad, trainable_weights)
self.opt = tf.train.AdamOptimizer(self.a_lr).apply_gradients(grads)
self.sess.run(tf.global_variables_initializer())
def critic_gradient(self):
"""get critic gradient function.
Returns:
function, gradient function for critic.
"""
cinput = self.critic.input
coutput = self.critic.output
# compute the gradient of the action with q value, dq/da.
action_grads = K.gradients(coutput, cinput[1])
return K.function([cinput[0], cinput[1]], action_grads)
def OU(self, x, mu=0, theta=0.15, sigma=0.2):
"""Ornstein-Uhlenbeck process.
formula:ou = θ * (μ - x) + σ * w
Arguments:
x: action value.
mu: μ, mean fo values.
theta: θ, rate the variable reverts towards to the mean.
sigma:σ, degree of volatility of the process.
Returns:
OU value
"""
return theta * (mu - x) + sigma * np.random.randn(1)
def get_action(self, X):
"""get actor action with ou noise.
Arguments:
X: state value.
"""
action = self.actor.predict(X)[0][0]
# add randomness to action selection for exploration
noise = max(self.epsilon, 0) * self.OU(action)
action = np.clip(action + noise, -self.bound, self.bound)
return action
def remember(self, state, action, reward, next_state, done):
"""add data to experience replay.
Arguments:
state: observation.
action: action.
reward: reward.
next_state: next_observation.
done: if game done.
"""
item = (state, action, reward, next_state, done)
self.memory_buffer.append(item)
def update_epsilon(self):
"""update epsilon.
"""
if self.epsilon >= self.epsilon_min:
self.epsilon *= self.epsilon_decay
def process_batch(self, batch):
"""process batch data.
Arguments:
batch: batch size.
Returns:
states: states.
actions: actions.
y: Q_value.
"""
y = []
# ranchom choice batch data from experience replay.
data = random.sample(self.memory_buffer, batch)
states = np.array([d[0] for d in data])
actions = np.array([d[1] for d in data])
next_states = np.array([d[3] for d in data])
# Q_target。
next_actions = self.target_actor.predict(next_states)
q = self.target_critic.predict([next_states, next_actions])
# update Q value
for i, (_, _, reward, _, done) in enumerate(data):
target = reward
if not done:
target += self.gamma * q[i][0]
y.append(target)
return states, actions, y
def update_model(self, X1, X2, y):
"""update ddpg model.
Arguments:
states: states.
actions: actions.
y: Q_value.
Returns:
loss: critic loss.
"""
# loss = self.critic.train_on_batch([X1, X2], y)
loss = self.critic.fit([X1, X2], y, verbose=0)
loss = np.mean(loss.history['loss'])
X3 = self.actor.predict(X1)
a_grads = np.array(self.get_critic_grad([X1, X3]))[0]
self.sess.run(self.opt, feed_dict={
self.ainput: X1,
self.action_gradient: a_grads
})
return loss
def update_target_model(self):
"""soft update target model.
formula:θt ← τ * θ + (1−τ) * θt, τ << 1.
"""
critic_weights = self.critic.get_weights()
actor_weights = self.actor.get_weights()
critic_target_weights = self.target_critic.get_weights()
actor_target_weights = self.target_actor.get_weights()
for i in range(len(critic_weights)):
critic_target_weights[i] = self.TAU * critic_weights[i] + (1 - self.TAU) * critic_target_weights[i]
for i in range(len(actor_weights)):
actor_target_weights[i] = self.TAU * actor_weights[i] + (1 - self.TAU) * actor_target_weights[i]
self.target_critic.set_weights(critic_target_weights)
self.target_actor.set_weights(actor_target_weights)
def train(self, episode, batch):
"""training model.
Arguments:
episode: ganme episode.
batch: batch size of episode.
Returns:
history: training history.
"""
history = {'episode': [], 'Episode_reward': [], 'Loss': []}
for i in range(episode):
observation = self.env.reset()
reward_sum = 0
losses = []
for j in range(200):
# chocie action from ε-greedy.
x = observation.reshape(-1, 3)
# actor action
action = self.get_action(x)
observation, reward, done, _ = self.env.step(action)
# add data to experience replay.
reward_sum += reward
self.remember(x[0], action, reward, observation, done)
if len(self.memory_buffer) > batch:
X1, X2, y = self.process_batch(batch)
# update DDPG model
loss = self.update_model(X1, X2, y)
# update target model
self.update_target_model()
# reduce epsilon pure batch.
self.update_epsilon()
losses.append(loss)
loss = np.mean(losses)
history['episode'].append(i)
history['Episode_reward'].append(reward_sum)
history['Loss'].append(loss)
print('Episode: {}/{} | reward: {} | loss: {:.3f}'.format(i, episode, reward_sum, loss))
self.actor.save_weights('model/ddpg_actor.h5')
self.critic.save_weights('model/ddpg_critic.h5')
return history
def play(self):
"""play game with model.
"""
print('play...')
observation = self.env.reset()
reward_sum = 0
random_episodes = 0
while random_episodes < 10:
self.env.render()
x = observation.reshape(-1, 3)
action = self.actor.predict(x)[0]
observation, reward, done, _ = self.env.step(action)
reward_sum += reward
if done:
print("Reward for this episode was: {}".format(reward_sum))
random_episodes += 1
reward_sum = 0
observation = self.env.reset()
self.env.close()
if __name__ == '__main__':
model = DDPG()
history = model.train(200, 128)
model.save_history(history, 'ddpg.csv')
model.load()
model.play()
================================================
FILE: DQN.py
================================================
# -*- coding: utf-8 -*-
import os
import random
import numpy as np
from collections import deque
from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam
from DRL import DRL
class DQN(DRL):
"""Deep Q-Learning.
"""
def __init__(self):
super(DQN, self).__init__()
self.model = self.build_model()
# experience replay.
self.memory_buffer = deque(maxlen=2000)
# discount rate for q value.
self.gamma = 0.95
# epsilon of ε-greedy.
self.epsilon = 1.0
# discount rate for epsilon.
self.epsilon_decay = 0.995
# min epsilon of ε-greedy.
self.epsilon_min = 0.01
def load(self):
if os.path.exists('model/dqn.h5'):
self.model.load_weights('model/dqn.h5')
def build_model(self):
"""basic model.
"""
inputs = Input(shape=(4,))
x = Dense(16, activation='relu')(inputs)
x = Dense(16, activation='relu')(x)
x = Dense(2, activation='linear')(x)
model = Model(inputs=inputs, outputs=x)
model.compile(loss='mse', optimizer=Adam(1e-3))
return model
def egreedy_action(self, state):
"""ε-greedy
Arguments:
state: observation
Returns:
action: action
"""
if np.random.rand() <= self.epsilon:
return random.randint(0, 1)
else:
q_values = self.model.predict(state)[0]
return np.argmax(q_values)
def remember(self, state, action, reward, next_state, done):
"""add data to experience replay.
Arguments:
state: observation
action: action
reward: reward
next_state: next_observation
done: if game done.
"""
item = (state, action, reward, next_state, done)
self.memory_buffer.append(item)
def update_epsilon(self):
"""update epsilon
"""
if self.epsilon >= self.epsilon_min:
self.epsilon *= self.epsilon_decay
def process_batch(self, batch):
"""process batch data
Arguments:
batch: batch size
Returns:
X: states
y: [Q_value1, Q_value2]
"""
# ranchom choice batch data from experience replay.
data = random.sample(self.memory_buffer, batch)
# Q_target。
states = np.array([d[0] for d in data])
next_states = np.array([d[3] for d in data])
y = self.model.predict(states)
q = self.model.predict(next_states)
for i, (_, action, reward, _, done) in enumerate(data):
target = reward
if not done:
target += self.gamma * np.amax(q[i])
y[i][action] = target
return states, y
def train(self, episode, batch):
"""training
Arguments:
episode: game episode
batch: batch size
Returns:
history: training history
"""
history = {'episode': [], 'Episode_reward': [], 'Loss': []}
count = 0
for i in range(episode):
observation = self.env.reset()
reward_sum = 0
loss = np.infty
done = False
while not done:
# chocie action from ε-greedy.
x = observation.reshape(-1, 4)
action = self.egreedy_action(x)
observation, reward, done, _ = self.env.step(action)
# add data to experience replay.
reward_sum += reward
self.remember(x[0], action, reward, observation, done)
if len(self.memory_buffer) > batch:
X, y = self.process_batch(batch)
loss = self.model.train_on_batch(X, y)
count += 1
# reduce epsilon pure batch.
self.update_epsilon()
if i % 5 == 0:
history['episode'].append(i)
history['Episode_reward'].append(reward_sum)
history['Loss'].append(loss)
print('Episode: {} | Episode reward: {} | loss: {:.3f} | e:{:.2f}'.format(i, reward_sum, loss, self.epsilon))
self.model.save_weights('model/dqn.h5')
return history
if __name__ == '__main__':
model = DQN()
history = model.train(600, 32)
model.save_history(history, 'dqn.csv')
model.load()
model.play()
================================================
FILE: DRL.py
================================================
# -*- coding: utf-8 -*-
import os
import gym
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
class DRL:
def __init__(self):
self.env = gym.make('CartPole-v0')
if not os.path.exists('model'):
os.mkdir('model')
if not os.path.exists('history'):
os.mkdir('history')
def play(self, m='pg'):
"""play game with model.
"""
print('play...')
observation = self.env.reset()
reward_sum = 0
random_episodes = 0
while random_episodes < 10:
self.env.render()
x = observation.reshape(-1, 4)
if m == 'pg':
prob = self.model.predict(x)[0][0]
action = 1 if prob > 0.5 else 0
elif m == 'acs':
prob = self.actor.predict(x)[0][0]
action = 1 if prob > 0.5 else 0
else:
action = np.argmax(self.model.predict(x)[0])
observation, reward, done, _ = self.env.step(action)
reward_sum += reward
if done:
print("Reward for this episode was: {}".format(reward_sum))
random_episodes += 1
reward_sum = 0
observation = self.env.reset()
self.env.close()
def plot(self, history):
x = history['episode']
r = history['Episode_reward']
l = history['Loss']
fig = plt.figure()
ax = fig.add_subplot(121)
ax.plot(x, r)
ax.set_title('Episode_reward')
ax.set_xlabel('episode')
ax = fig.add_subplot(122)
ax.plot(x, l)
ax.set_title('Loss')
ax.set_xlabel('episode')
plt.show()
def save_history(self, history, name):
name = os.path.join('history', name)
df = pd.DataFrame.from_dict(history)
df.to_csv(name, index=False, encoding='utf-8')
================================================
FILE: DoubleDQN.py
================================================
# -*- coding: utf-8 -*-
import os
import random
import numpy as np
from DQN import DQN
class DDQN(DQN):
"""Nature Deep Q-Learning.
"""
def __init__(self):
super(DDQN, self).__init__()
self.model = self.build_model()
self.target_model = self.build_model()
self.update_target_model()
def load(self):
if os.path.exists('model/ddqn.h5'):
self.model.load_weights('model/ddqn.h5')
def update_target_model(self):
"""update target_model
"""
self.target_model.set_weights(self.model.get_weights())
def process_batch(self, batch):
"""process batch data
Arguments:
batch: batch size
Returns:
X: states
y: [Q_value1, Q_value2]
"""
# ranchom choice batch data from experience replay.
data = random.sample(self.memory_buffer, batch)
# Q_target。
states = np.array([d[0] for d in data])
next_states = np.array([d[3] for d in data])
y = self.model.predict(states)
q = self.target_model.predict(next_states)
next_action = np.argmax(self.model.predict(next_states), axis=1)
for i, (_, action, reward, _, done) in enumerate(data):
target = reward
if not done:
target += self.gamma * q[i][next_action[i]]
y[i][action] = target
return states, y
def train(self, episode, batch):
"""training
Arguments:
episode: game episode
batch: batch size
Returns:
history: training history
"""
history = {'episode': [], 'Episode_reward': [], 'Loss': []}
count = 0
for i in range(episode):
observation = self.env.reset()
reward_sum = 0
loss = np.infty
done = False
while not done:
# chocie action from ε-greedy.
x = observation.reshape(-1, 4)
action = self.egreedy_action(x)
observation, reward, done, _ = self.env.step(action)
# add data to experience replay.
reward_sum += reward
self.remember(x[0], action, reward, observation, done)
if len(self.memory_buffer) > batch:
X, y = self.process_batch(batch)
loss = self.model.train_on_batch(X, y)
count += 1
# reduce epsilon pure batch.
self.update_epsilon()
# update target_model every 20 episode
if count != 0 and count % 20 == 0:
self.update_target_model()
if i % 5 == 0:
history['episode'].append(i)
history['Episode_reward'].append(reward_sum)
history['Loss'].append(loss)
print('Episode: {} | Episode reward: {} | loss: {:.3f} | e:{:.2f}'.format(i, reward_sum, loss, self.epsilon))
self.model.save_weights('model/ddqn.h5')
return history
if __name__ == '__main__':
model = DDQN()
history = model.train(600, 32)
model.save_history(history, 'ddqn.csv')
model.load()
model.play('dqn')
================================================
FILE: DuelingDQN.py
================================================
# -*- coding: utf-8 -*-
import os
import numpy as np
from keras.layers import Input, Dense, Add, Subtract, Lambda
from keras.models import Model
from keras.optimizers import Adam
import keras.backend as K
from NatureDQN import NDQN
class DuelingDQN(NDQN):
"""Dueling DQN.
"""
def __init__(self):
super(DuelingDQN, self).__init__()
def load(self):
if os.path.exists('model/dueling.h5'):
self.model.load_weights('model/dueling.h5')
def build_model(self):
"""basic model.
"""
inputs = Input(shape=(4,))
x = Dense(16, activation='relu')(inputs)
x = Dense(16, activation='relu')(x)
value = Dense(2, activation='linear')(x)
a = Dense(2, activation='linear')(x)
meam = Lambda(lambda x: K.mean(x, axis=1, keepdims=True))(a)
advantage = Subtract()([a, meam])
q = Add()([value, advantage])
model = Model(inputs=inputs, outputs=q)
model.compile(loss='mse', optimizer=Adam(1e-3))
return model
def train(self, episode, batch):
"""training
Arguments:
episode: game episode
batch: batch size
Returns:
history: training history
"""
history = {'episode': [], 'Episode_reward': [], 'Loss': []}
count = 0
for i in range(episode):
observation = self.env.reset()
reward_sum = 0
loss = np.infty
done = False
while not done:
# chocie action from ε-greedy.
x = observation.reshape(-1, 4)
action = self.egreedy_action(x)
observation, reward, done, _ = self.env.step(action)
# add data to experience replay.
reward_sum += reward
self.remember(x[0], action, reward, observation, done)
if len(self.memory_buffer) > batch:
X, y = self.process_batch(batch)
loss = self.model.train_on_batch(X, y)
count += 1
# reduce epsilon pure batch.
self.update_epsilon()
# update target_model every 20 episode
if count != 0 and count % 20 == 0:
self.update_target_model()
if i % 5 == 0:
history['episode'].append(i)
history['Episode_reward'].append(reward_sum)
history['Loss'].append(loss)
print('Episode: {} | Episode reward: {} | loss: {:.3f} | e:{:.2f}'.format(i, reward_sum, loss, self.epsilon))
self.model.save_weights('model/dueling.h5')
return history
if __name__ == '__main__':
model = DuelingDQN()
history = model.train(600, 32)
model.save_history(history, 'dueling.csv')
model.load()
model.play('dqn')
================================================
FILE: NatureDQN.py
================================================
# -*- coding: utf-8 -*-
import os
import random
import numpy as np
from DQN import DQN
class NDQN(DQN):
"""Nature Deep Q-Learning.
"""
def __init__(self):
super(NDQN, self).__init__()
self.model = self.build_model()
self.target_model = self.build_model()
self.update_target_model()
def load(self):
if os.path.exists('model/ndqn.h5'):
self.model.load_weights('model/ndqn.h5')
def update_target_model(self):
"""update target_model
"""
self.target_model.set_weights(self.model.get_weights())
def process_batch(self, batch):
"""process batch data
Arguments:
batch: batch size
Returns:
X: states
y: [Q_value1, Q_value2]
"""
# ranchom choice batch data from experience replay.
data = random.sample(self.memory_buffer, batch)
# Q_target。
states = np.array([d[0] for d in data])
next_states = np.array([d[3] for d in data])
y = self.model.predict(states)
q = self.target_model.predict(next_states)
for i, (_, action, reward, _, done) in enumerate(data):
target = reward
if not done:
target += self.gamma * np.amax(q[i])
y[i][action] = target
return states, y
def train(self, episode, batch):
"""training
Arguments:
episode: game episode
batch: batch size
Returns:
history: training history
"""
history = {'episode': [], 'Episode_reward': [], 'Loss': []}
count = 0
for i in range(episode):
observation = self.env.reset()
reward_sum = 0
loss = np.infty
done = False
while not done:
# chocie action from ε-greedy.
x = observation.reshape(-1, 4)
action = self.egreedy_action(x)
observation, reward, done, _ = self.env.step(action)
# add data to experience replay.
reward_sum += reward
self.remember(x[0], action, reward, observation, done)
if len(self.memory_buffer) > batch:
X, y = self.process_batch(batch)
loss = self.model.train_on_batch(X, y)
count += 1
# reduce epsilon pure batch.
self.update_epsilon()
# update target_model every 20 episode
if count != 0 and count % 20 == 0:
self.update_target_model()
if i % 5 == 0:
history['episode'].append(i)
history['Episode_reward'].append(reward_sum)
history['Loss'].append(loss)
print('Episode: {} | Episode reward: {} | loss: {:.3f} | e:{:.2f}'.format(i, reward_sum, loss, self.epsilon))
self.model.save_weights('model/ndqn.h5')
return history
if __name__ == '__main__':
model = NDQN()
history = model.train(600, 32)
model.save_history(history, 'ndqn.csv')
model.load()
model.play('dqn')
================================================
FILE: PPO_TF.py
================================================
import os
import gym
import numpy as np
import pandas as pd
import tensorflow as tf
class PPO:
def __init__(self, ep, batch, t='ppo2'):
self.t = t
self.ep = ep
self.batch = batch
self.log = 'model/{}_log'.format(t)
self.env = gym.make('Pendulum-v0')
self.bound = self.env.action_space.high[0]
self.gamma = 0.9
self.A_LR = 0.0001
self.C_LR = 0.0002
self.A_UPDATE_STEPS = 10
self.C_UPDATE_STEPS = 10
# KL penalty, d_target、β for ppo1
self.kl_target = 0.01
self.lam = 0.5
# ε for ppo2
self.epsilon = 0.2
self.sess = tf.Session()
self.build_model()
def _build_critic(self):
"""critic model.
"""
with tf.variable_scope('critic'):
x = tf.layers.dense(self.states, 100, tf.nn.relu)
self.v = tf.layers.dense(x, 1)
self.advantage = self.dr - self.v
def _build_actor(self, name, trainable):
"""actor model.
"""
with tf.variable_scope(name):
x = tf.layers.dense(self.states, 100, tf.nn.relu, trainable=trainable)
mu = self.bound * tf.layers.dense(x, 1, tf.nn.tanh, trainable=trainable)
sigma = tf.layers.dense(x, 1, tf.nn.softplus, trainable=trainable)
norm_dist = tf.distributions.Normal(loc=mu, scale=sigma)
params = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=name)
return norm_dist, params
def build_model(self):
"""build model with ppo loss.
"""
# inputs
self.states = tf.placeholder(tf.float32, [None, 3], 'states')
self.action = tf.placeholder(tf.float32, [None, 1], 'action')
self.adv = tf.placeholder(tf.float32, [None, 1], 'advantage')
self.dr = tf.placeholder(tf.float32, [None, 1], 'discounted_r')
# build model
self._build_critic()
nd, pi_params = self._build_actor('actor', trainable=True)
old_nd, oldpi_params = self._build_actor('old_actor', trainable=False)
# define ppo loss
with tf.variable_scope('loss'):
# critic loss
self.closs = tf.reduce_mean(tf.square(self.advantage))
# actor loss
with tf.variable_scope('surrogate'):
ratio = tf.exp(nd.log_prob(self.action) - old_nd.log_prob(self.action))
surr = ratio * self.adv
if self.t == 'ppo1':
self.tflam = tf.placeholder(tf.float32, None, 'lambda')
kl = tf.distributions.kl_divergence(old_nd, nd)
self.kl_mean = tf.reduce_mean(kl)
self.aloss = -(tf.reduce_mean(surr - self.tflam * kl))
else:
self.aloss = -tf.reduce_mean(tf.minimum(
surr,
tf.clip_by_value(ratio, 1.- self.epsilon, 1.+ self.epsilon) * self.adv))
# define Optimizer
with tf.variable_scope('optimize'):
self.ctrain_op = tf.train.AdamOptimizer(self.C_LR).minimize(self.closs)
self.atrain_op = tf.train.AdamOptimizer(self.A_LR).minimize(self.aloss)
with tf.variable_scope('sample_action'):
self.sample_op = tf.squeeze(nd.sample(1), axis=0)
# update old actor
with tf.variable_scope('update_old_actor'):
self.update_old_actor = [oldp.assign(p) for p, oldp in zip(pi_params, oldpi_params)]
tf.summary.FileWriter(self.log, self.sess.graph)
self.sess.run(tf.global_variables_initializer())
def choose_action(self, state):
"""choice continuous action from normal distributions.
Arguments:
state: state.
Returns:
action.
"""
state = state[np.newaxis, :]
action = self.sess.run(self.sample_op, {self.states: state})[0]
return np.clip(action, -self.bound, self.bound)
def get_value(self, state):
"""get q value.
Arguments:
state: state.
Returns:
q_value.
"""
if state.ndim < 2: state = state[np.newaxis, :]
return self.sess.run(self.v, {self.states: state})
def discount_reward(self, states, rewards, next_observation):
"""Compute target value.
Arguments:
states: state in episode.
rewards: reward in episode.
next_observation: state of last action.
Returns:
targets: q targets.
"""
s = np.vstack([states, next_observation.reshape(-1, 3)])
q_values = self.get_value(s).flatten()
targets = rewards + self.gamma * q_values[1:]
targets = targets.reshape(-1, 1)
return targets
# not work.
# def neglogp(self, mean, std, x):
# """Gaussian likelihood
# """
# return 0.5 * tf.reduce_sum(tf.square((x - mean) / std), axis=-1) \
# + 0.5 * np.log(2.0 * np.pi) * tf.to_float(tf.shape(x)[-1]) \
# + tf.reduce_sum(tf.log(std), axis=-1)
def update(self, states, action, dr):
"""update model.
Arguments:
states: states.
action: action of states.
dr: discount reward of action.
"""
self.sess.run(self.update_old_actor)
adv = self.sess.run(self.advantage,
{self.states: states,
self.dr: dr})
# update actor
if self.t == 'ppo1':
# run ppo1 loss
for _ in range(self.A_UPDATE_STEPS):
_, kl = self.sess.run(
[self.atrain_op, self.kl_mean],
{self.states: states,
self.action: action,
self.adv: adv,
self.tflam: self.lam})
if kl < self.kl_target / 1.5:
self.lam /= 2
elif kl > self.kl_target * 1.5:
self.lam *= 2
else:
# run ppo2 loss
for _ in range(self.A_UPDATE_STEPS):
self.sess.run(self.atrain_op,
{self.states: states,
self.action: action,
self.adv: adv})
# update critic
for _ in range(self.C_UPDATE_STEPS):
self.sess.run(self.ctrain_op,
{self.states: states,
self.dr: dr})
def train(self):
"""train method.
"""
tf.reset_default_graph()
history = {'episode': [], 'Episode_reward': []}
for i in range(self.ep):
observation = self.env.reset()
states, actions, rewards = [], [], []
episode_reward = 0
j = 0
while True:
a = self.choose_action(observation)
next_observation, reward, done, _ = self.env.step(a)
states.append(observation)
actions.append(a)
episode_reward += reward
rewards.append((reward + 8) / 8)
observation = next_observation
if (j + 1) % self.batch == 0:
states = np.array(states)
actions = np.array(actions)
rewards = np.array(rewards)
d_reward = self.discount_reward(states, rewards, next_observation)
self.update(states, actions, d_reward)
states, actions, rewards = [], [], []
if done:
break
j += 1
history['episode'].append(i)
history['Episode_reward'].append(episode_reward)
print('Episode: {} | Episode reward: {:.2f}'.format(i, episode_reward))
return history
def save_history(self, history, name):
name = os.path.join('history', name)
df = pd.DataFrame.from_dict(history)
df.to_csv(name, index=False, encoding='utf-8')
if __name__ == '__main__':
model = PPO(1000, 32, 'ppo2')
history = model.train()
model.save_history(history, 'ppo2.csv')
================================================
FILE: PolicyNetwork.py
================================================
# -*- coding: utf-8 -*-
import os
import numpy as np
from keras.layers import Input, Dense
from keras.models import Model
from keras.optimizers import Adam
import keras.backend as K
from DRL import DRL
class PolicyNetwork(DRL):
"""Policy Gradient Algorithms(Policy Network)
"""
def __init__(self):
super(PolicyNetwork, self).__init__()
self.model = self._build_model()
self.gamma = 0.95
def load(self):
if os.path.exists('model/pg.h5'):
self.model.load_weights('model/pg.h5')
def _build_model(self):
"""basic model.
"""
inputs = Input(shape=(4,), name='ob_input')
x = Dense(16, activation='relu')(inputs)
x = Dense(16, activation='relu')(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(inputs=inputs, outputs=x)
return model
def loss(self, y_true, y_pred):
"""loss function.
Arguments:
y_true: (action, reward)
y_pred: action_prob
Returns:
loss: reward loss
"""
action_pred = y_pred
action_true, discount_episode_reward = y_true[:, 0], y_true[:, 1]
action_true = K.reshape(action_true, (-1, 1))
loss = K.binary_crossentropy(action_true, action_pred)
loss = loss * K.flatten(discount_episode_reward)
return loss
def discount_reward(self, rewards):
"""Discount reward
Arguments:
rewards: rewards in a episode.
"""
# compute the discounted reward backwards through time.
discount_rewards = np.zeros_like(rewards, dtype=np.float32)
cumulative = 0.
for i in reversed(range(len(rewards))):
cumulative = cumulative * self.gamma + rewards[i]
discount_rewards[i] = cumulative
# size the rewards to be unit normal (helps control the gradient estimator variance).
discount_rewards -= np.mean(discount_rewards)
discount_rewards //= np.std(discount_rewards)
return list(discount_rewards)
def train(self, episode, batch):
"""training model.
Arguments:
episode: ganme episode
batch: batch size of episode
Returns:
history: training history
"""
self.model.compile(loss=self.loss, optimizer=Adam(lr=0.01))
history = {'episode': [], 'Episode_reward': [], 'Loss': []}
episode_reward = 0
states = []
actions = []
rewards = []
discount_rewards = []
for i in range(episode):
observation = self.env.reset()
erewards = []
while True:
x = observation.reshape(-1, 4)
prob = self.model.predict(x)[0][0]
# choice action with prob.
action = np.random.choice(np.array(range(2)), size=1, p=[1 - prob, prob])[0]
observation, reward, done, _ = self.env.step(action)
states.append(x[0])
actions.append(action)
erewards.append(reward)
rewards.append(reward)
if done:
# calculate discount rewards every episode.
discount_rewards.extend(self.discount_reward(erewards))
break
if i != 0 and i % batch == 0:
episode_reward = sum(rewards) / batch
X = np.array(states)
y = np.array(list(zip(actions, discount_rewards)))
loss = self.model.train_on_batch(X, y)
history['episode'].append(i)
history['Episode_reward'].append(episode_reward)
history['Loss'].append(loss)
print('Episode: {} | Episode reward: {} | loss: {:.3f}'.format(i, episode_reward, loss))
episode_reward = 0
states = []
actions = []
rewards = []
discount_rewards = []
self.model.save_weights('model/pg.h5')
return history
if __name__ == '__main__':
model = PolicyNetwork()
history = model.train(5000, 5)
model.save_history(history, 'pg.csv')
model.load()
model.play()
================================================
FILE: README.md
================================================
# Deep-Reinforcement-Learning-Practice
Practice of Deep Reinforcement Learning with Keras and gym.
Continuous updating...
## Algorithm
| # | Name | Paper |
| - | ----- | :--------: |
| 1 | [DQN](DQN.py) | [Playing Atari with Deep Reinforcement Learning](http://cn.arxiv.org/abs/1312.5602)|
| 2 | [Nature DQN](NatureDQN.py) | [Human-level control through deep reinforcement learning](https://www.nature.com/articles/nature14236) |
| 3 | [Double DQN](DoubleDQN.py) | [Deep Reinforcement Learning with Double Q-learning](http://cn.arxiv.org/abs/1509.06461v1) |
| 4 | [Dueling DQN](DuelingDQN.py) | [Dueling Network Architectures for Deep Reinforcement Learning](https://arxiv.org/pdf/1511.06581.pdf) |
| 5 | [Actor-Critic](AC_sparse.py) | [Actor-Critic Algorithms](https://papers.nips.cc/paper/1786-actor-critic-algorithms.pdf) |
| 6 | [Policy Network](PolicyNetwork.py) | [Policy gradient methods for reinforcement learning with function approximation](https://www.researchgate.net/publication/2503757_Policy_Gradient_Methods_for_Reinforcement_Learning_with_Function_Approximation) |
| 7 | [DDPG](DDPG.py) | [Continuous Control with Deep Reinforcement Learning](https://arxiv.org/abs/1509.02971) |
| 8 | [PPO_TF](PPO_TF.py) | [Proximal Policy Optimization Algorithms](https://arxiv.org/abs/1707.06347) |
| 9 | [A3C](A3C_sparse.py) | [Asynchronous Methods for Deep Reinforcement Learning](https://arxiv.org/pdf/1602.01783.pdf) |
================================================
FILE: game/CartPole.py
================================================
# -*- coding: utf-8 -*-
import gym
import numpy as np
def try_gym():
# creat CartPole env.
env = gym.make('CartPole-v0')
# reset game env.
env.reset()
# episodes of game
random_episodes = 0
# sum of reward of game per episode
reward_sum = 0
while random_episodes < 10:
# show game
env.render()
# random choice a action
# execute the action
observation, reward, done, _ = env.step(np.random.randint(0, 2))
reward_sum += reward
# print result and reset ganme env if game done.
if done:
random_episodes += 1
print("Reward for this episode was: {}".format(reward_sum))
reward_sum = 0
env.reset()
env.close()
if __name__ == '__main__':
try_gym()
================================================
FILE: game/Pendulum.py
================================================
# -*- coding: utf-8 -*-
import gym
def try_gym():
# creat Pendulum env.
env = gym.make('Pendulum-v0')
# reset game env.
env.reset()
# episodes of game
random_episodes = 0
# sum of reward of game per episode
reward_sum = 0
while random_episodes < 10:
# show game
env.render()
# random choice a action
# execute the action
action = env.action_space.sample()
observation, reward, done, _ = env.step(action)
reward_sum += reward
# print result and reset ganme env if game done.
if done:
random_episodes += 1
print("Reward for this episode was: {}".format(reward_sum))
reward_sum = 0
env.reset()
env.close()
if __name__ == '__main__':
try_gym()
================================================
FILE: history/a3c_sparse.csv
================================================
episode,Episode_reward
0,11.0
1,17.0
2,17.0
3,13.0
4,14.0
5,22.0
6,34.0
7,14.0
8,20.0
9,25.0
10,14.0
11,12.0
12,14.0
13,15.0
14,11.0
15,21.0
16,33.0
17,18.0
18,20.0
19,34.0
20,13.0
21,25.0
22,21.0
23,17.0
24,19.0
25,26.0
26,36.0
27,45.0
28,31.0
29,19.0
30,14.0
31,23.0
32,32.0
33,19.0
34,10.0
35,14.0
36,15.0
37,28.0
38,10.0
39,56.0
40,20.0
41,10.0
42,82.0
43,32.0
44,10.0
45,21.0
46,67.0
47,27.0
48,21.0
49,37.0
50,15.0
51,19.0
52,27.0
53,45.0
54,18.0
55,22.0
56,25.0
57,17.0
58,31.0
59,26.0
60,33.0
61,40.0
62,14.0
63,41.0
64,76.0
65,23.0
66,83.0
67,32.0
68,77.0
69,69.0
70,21.0
71,17.0
72,51.0
73,17.0
74,18.0
75,111.0
76,11.0
77,38.0
78,31.0
79,31.0
80,37.0
81,25.0
82,51.0
83,17.0
84,28.0
85,18.0
86,21.0
87,84.0
88,54.0
89,42.0
90,24.0
91,28.0
92,65.0
93,60.0
94,20.0
95,52.0
96,22.0
97,124.0
98,35.0
99,90.0
100,113.0
101,66.0
102,108.0
103,118.0
104,103.0
105,61.0
106,172.0
107,121.0
108,39.0
109,63.0
110,35.0
111,38.0
112,64.0
113,141.0
114,47.0
115,154.0
116,75.0
117,31.0
118,22.0
119,24.0
120,60.0
121,42.0
122,65.0
123,70.0
124,28.0
125,38.0
126,87.0
127,30.0
128,39.0
129,60.0
130,145.0
131,31.0
132,115.0
133,34.0
134,23.0
135,19.0
136,35.0
137,29.0
138,139.0
139,51.0
140,70.0
141,55.0
142,55.0
143,78.0
144,70.0
145,156.0
146,87.0
147,57.0
148,111.0
149,46.0
150,98.0
151,65.0
152,30.0
153,128.0
154,128.0
155,21.0
156,79.0
157,23.0
158,117.0
159,66.0
160,128.0
161,144.0
162,81.0
163,93.0
164,137.0
165,200.0
166,188.0
167,146.0
168,178.0
169,132.0
170,76.0
171,182.0
172,57.0
173,200.0
174,115.0
175,165.0
176,148.0
177,48.0
178,92.0
179,38.0
180,62.0
181,78.0
182,63.0
183,50.0
184,200.0
185,89.0
186,92.0
187,141.0
188,103.0
189,99.0
190,168.0
191,118.0
192,111.0
193,150.0
194,126.0
195,102.0
196,81.0
197,92.0
198,101.0
199,157.0
200,142.0
201,145.0
202,121.0
203,200.0
204,101.0
205,38.0
206,93.0
207,55.0
208,66.0
209,115.0
210,111.0
211,33.0
212,141.0
213,180.0
214,183.0
215,200.0
216,200.0
217,199.0
218,200.0
219,109.0
220,200.0
221,200.0
222,200.0
223,200.0
224,150.0
225,200.0
226,143.0
227,163.0
228,126.0
229,151.0
230,191.0
231,146.0
232,160.0
233,159.0
234,181.0
235,133.0
236,159.0
237,196.0
238,171.0
239,170.0
240,200.0
241,200.0
242,200.0
243,197.0
244,200.0
245,147.0
246,200.0
247,200.0
248,200.0
249,167.0
250,200.0
251,200.0
252,200.0
253,164.0
254,159.0
255,200.0
256,115.0
257,200.0
258,192.0
259,154.0
260,138.0
261,58.0
262,161.0
263,200.0
264,200.0
265,150.0
266,145.0
267,188.0
268,154.0
269,86.0
270,151.0
271,53.0
272,117.0
273,106.0
274,70.0
275,45.0
276,65.0
277,105.0
278,42.0
279,38.0
280,69.0
281,59.0
282,86.0
283,52.0
284,75.0
285,23.0
286,99.0
287,66.0
288,43.0
289,70.0
290,55.0
291,105.0
292,85.0
293,112.0
294,84.0
295,76.0
296,83.0
297,146.0
298,114.0
299,87.0
300,107.0
301,118.0
302,168.0
303,163.0
304,200.0
305,200.0
306,186.0
307,200.0
308,200.0
309,182.0
310,133.0
311,200.0
312,121.0
313,34.0
314,96.0
315,20.0
316,180.0
317,30.0
318,20.0
319,21.0
320,17.0
321,22.0
322,27.0
323,26.0
324,89.0
325,102.0
326,27.0
327,31.0
328,41.0
329,107.0
330,20.0
331,111.0
332,127.0
333,94.0
334,134.0
335,103.0
336,187.0
337,200.0
338,200.0
339,200.0
340,200.0
341,149.0
342,200.0
343,200.0
344,112.0
345,146.0
346,200.0
347,153.0
348,200.0
349,123.0
350,200.0
351,152.0
352,168.0
353,200.0
354,131.0
355,147.0
356,123.0
357,148.0
358,200.0
359,200.0
360,175.0
361,200.0
362,130.0
363,200.0
364,200.0
365,120.0
366,33.0
367,173.0
368,147.0
369,175.0
370,200.0
371,200.0
372,200.0
373,188.0
374,110.0
375,200.0
376,116.0
377,104.0
378,200.0
379,109.0
380,200.0
381,191.0
382,200.0
383,200.0
384,200.0
385,200.0
386,128.0
387,170.0
388,200.0
389,200.0
390,200.0
391,200.0
392,127.0
393,200.0
394,192.0
395,200.0
396,171.0
397,200.0
398,123.0
399,200.0
400,191.0
401,173.0
402,148.0
403,198.0
404,96.0
405,200.0
406,110.0
407,199.0
408,200.0
409,106.0
410,189.0
411,200.0
412,200.0
413,181.0
414,200.0
415,200.0
416,200.0
417,197.0
418,189.0
419,124.0
420,162.0
421,197.0
422,200.0
423,200.0
424,200.0
425,200.0
426,200.0
427,200.0
428,200.0
429,188.0
430,83.0
431,197.0
432,200.0
433,200.0
434,200.0
435,200.0
436,165.0
437,200.0
438,200.0
439,200.0
440,200.0
441,200.0
442,200.0
443,143.0
444,200.0
445,200.0
446,192.0
447,200.0
448,200.0
449,200.0
450,164.0
451,135.0
452,160.0
453,119.0
454,140.0
455,139.0
456,154.0
457,167.0
458,148.0
459,139.0
460,122.0
461,129.0
462,131.0
463,117.0
464,145.0
465,159.0
466,127.0
467,157.0
468,156.0
469,166.0
470,148.0
471,141.0
472,200.0
473,200.0
474,183.0
475,200.0
476,200.0
477,200.0
478,200.0
479,200.0
480,200.0
481,200.0
482,200.0
483,200.0
484,200.0
485,200.0
486,200.0
487,200.0
488,200.0
489,200.0
490,200.0
491,200.0
492,200.0
493,200.0
494,200.0
495,200.0
496,183.0
497,185.0
498,200.0
499,200.0
500,200.0
501,200.0
502,173.0
503,200.0
504,166.0
505,200.0
506,194.0
507,200.0
508,200.0
509,200.0
510,158.0
511,200.0
512,187.0
513,166.0
514,184.0
515,142.0
516,179.0
517,139.0
518,184.0
519,181.0
520,136.0
521,194.0
522,191.0
523,156.0
524,131.0
525,177.0
526,128.0
527,163.0
528,147.0
529,200.0
530,200.0
531,200.0
532,200.0
533,200.0
534,200.0
535,200.0
536,200.0
537,200.0
538,200.0
539,200.0
540,200.0
541,200.0
542,200.0
543,187.0
544,200.0
545,200.0
546,200.0
547,200.0
548,200.0
549,200.0
550,199.0
551,200.0
552,200.0
553,200.0
554,200.0
555,200.0
556,200.0
557,200.0
558,200.0
559,142.0
560,200.0
561,200.0
562,143.0
563,200.0
564,200.0
565,181.0
566,166.0
567,200.0
568,160.0
569,200.0
570,200.0
571,200.0
572,200.0
573,200.0
574,200.0
575,200.0
576,200.0
577,200.0
578,200.0
579,200.0
580,200.0
581,200.0
582,200.0
583,200.0
584,200.0
585,200.0
586,200.0
587,200.0
588,200.0
589,200.0
590,200.0
591,200.0
592,200.0
593,200.0
594,200.0
595,200.0
596,200.0
597,200.0
598,200.0
599,200.0
600,200.0
601,200.0
602,200.0
603,200.0
604,200.0
605,200.0
606,200.0
607,200.0
608,200.0
609,200.0
610,200.0
611,200.0
612,200.0
613,200.0
614,200.0
615,200.0
616,200.0
617,200.0
618,200.0
619,200.0
620,200.0
621,200.0
622,200.0
623,200.0
624,200.0
625,200.0
626,200.0
627,200.0
628,200.0
629,200.0
630,200.0
631,200.0
632,200.0
633,200.0
634,200.0
635,200.0
636,200.0
637,200.0
638,200.0
639,200.0
640,200.0
641,200.0
642,200.0
643,200.0
644,200.0
645,200.0
646,200.0
647,200.0
648,200.0
649,200.0
650,200.0
651,200.0
652,200.0
653,200.0
654,200.0
655,200.0
656,200.0
657,200.0
658,200.0
659,200.0
660,200.0
661,200.0
662,200.0
663,200.0
664,200.0
665,200.0
666,200.0
667,200.0
668,200.0
669,200.0
670,200.0
671,200.0
672,200.0
673,200.0
674,200.0
675,200.0
676,200.0
677,200.0
678,200.0
679,200.0
680,200.0
681,200.0
682,200.0
683,182.0
684,175.0
685,200.0
686,200.0
687,200.0
688,159.0
689,160.0
690,169.0
691,186.0
692,183.0
693,162.0
694,170.0
695,153.0
696,195.0
697,170.0
698,170.0
699,200.0
700,174.0
701,167.0
702,137.0
703,184.0
704,147.0
705,168.0
706,162.0
707,138.0
708,130.0
709,143.0
710,129.0
711,135.0
712,133.0
713,130.0
714,137.0
715,115.0
716,127.0
717,136.0
718,130.0
719,122.0
720,140.0
721,87.0
722,47.0
723,51.0
724,30.0
725,41.0
726,42.0
727,32.0
728,34.0
729,29.0
730,24.0
731,26.0
732,25.0
733,20.0
734,29.0
735,26.0
736,30.0
737,29.0
738,29.0
739,30.0
740,27.0
741,26.0
742,31.0
743,17.0
744,26.0
745,25.0
746,20.0
747,25.0
748,21.0
749,31.0
750,19.0
751,21.0
752,34.0
753,15.0
754,25.0
755,21.0
756,22.0
757,13.0
758,22.0
759,31.0
760,29.0
761,26.0
762,26.0
763,27.0
764,33.0
765,31.0
766,29.0
767,32.0
768,34.0
769,30.0
770,30.0
771,38.0
772,39.0
773,52.0
774,34.0
775,34.0
776,47.0
777,36.0
778,34.0
779,43.0
780,34.0
781,44.0
782,45.0
783,42.0
784,31.0
785,29.0
786,42.0
787,39.0
788,32.0
789,37.0
790,34.0
791,41.0
792,52.0
793,47.0
794,35.0
795,42.0
796,51.0
797,40.0
798,44.0
799,37.0
800,35.0
801,40.0
802,65.0
803,45.0
804,49.0
805,37.0
806,55.0
807,67.0
808,46.0
809,70.0
810,49.0
811,63.0
812,57.0
813,50.0
814,70.0
815,54.0
816,71.0
817,49.0
818,69.0
819,85.0
820,68.0
821,68.0
822,80.0
823,70.0
824,58.0
825,66.0
826,69.0
827,70.0
828,55.0
829,74.0
830,96.0
831,74.0
832,88.0
833,87.0
834,87.0
835,77.0
836,97.0
837,82.0
838,86.0
839,86.0
840,82.0
841,79.0
842,79.0
843,92.0
844,96.0
845,96.0
846,88.0
847,117.0
848,89.0
849,133.0
850,109.0
851,90.0
852,108.0
853,116.0
854,119.0
855,101.0
856,100.0
857,86.0
858,121.0
859,94.0
860,133.0
861,94.0
862,83.0
863,106.0
864,101.0
865,119.0
866,112.0
867,102.0
868,110.0
869,108.0
870,124.0
871,120.0
872,127.0
873,103.0
874,84.0
875,101.0
876,92.0
877,150.0
878,112.0
879,114.0
880,116.0
881,114.0
882,107.0
883,134.0
884,121.0
885,98.0
886,120.0
887,138.0
888,106.0
889,191.0
890,158.0
891,200.0
892,162.0
893,200.0
894,194.0
895,169.0
896,191.0
897,169.0
898,159.0
899,181.0
900,147.0
901,196.0
902,198.0
903,155.0
904,198.0
905,188.0
906,190.0
907,170.0
908,189.0
909,200.0
910,191.0
911,163.0
912,156.0
913,152.0
914,156.0
915,140.0
916,174.0
917,127.0
918,140.0
919,119.0
920,150.0
921,156.0
922,124.0
923,125.0
924,137.0
925,125.0
926,128.0
927,153.0
928,121.0
929,128.0
930,119.0
931,136.0
932,126.0
933,143.0
934,140.0
935,153.0
936,158.0
937,152.0
938,154.0
939,128.0
940,152.0
941,148.0
942,177.0
943,200.0
944,157.0
945,182.0
946,166.0
947,162.0
948,171.0
949,200.0
950,176.0
951,194.0
952,169.0
953,200.0
954,192.0
955,189.0
956,200.0
957,200.0
958,186.0
959,191.0
960,190.0
961,200.0
962,200.0
963,200.0
964,200.0
965,200.0
966,200.0
967,200.0
968,200.0
969,200.0
970,200.0
971,200.0
972,200.0
973,200.0
974,200.0
975,200.0
976,200.0
977,200.0
978,200.0
979,200.0
980,200.0
981,200.0
982,200.0
983,197.0
984,200.0
985,200.0
986,200.0
987,200.0
988,200.0
989,200.0
990,200.0
991,200.0
992,200.0
993,200.0
994,200.0
995,200.0
996,200.0
997,200.0
998,200.0
999,200.0
1000,196.0
1001,200.0
1002,200.0
1003,200.0
1004,200.0
1005,200.0
1006,200.0
1007,200.0
1008,200.0
1009,200.0
1010,200.0
1011,200.0
1012,200.0
1013,200.0
1014,200.0
1015,200.0
1016,200.0
1017,200.0
1018,200.0
1019,200.0
1020,200.0
1021,200.0
1022,200.0
1023,200.0
1024,200.0
1025,200.0
1026,200.0
1027,200.0
1028,200.0
1029,200.0
1030,200.0
1031,200.0
1032,200.0
1033,200.0
1034,200.0
1035,200.0
1036,200.0
1037,200.0
1038,200.0
1039,200.0
1040,200.0
1041,200.0
1042,200.0
1043,200.0
1044,200.0
1045,200.0
1046,200.0
1047,200.0
1048,200.0
1049,200.0
1050,200.0
1051,200.0
1052,200.0
1053,200.0
1054,200.0
1055,200.0
1056,200.0
1057,200.0
1058,200.0
1059,200.0
1060,200.0
1061,200.0
1062,200.0
1063,200.0
1064,200.0
1065,200.0
1066,200.0
1067,200.0
1068,200.0
1069,200.0
1070,200.0
1071,200.0
1072,200.0
1073,200.0
1074,200.0
1075,200.0
1076,200.0
1077,200.0
1078,200.0
1079,200.0
1080,200.0
1081,200.0
1082,200.0
1083,200.0
1084,200.0
1085,200.0
1086,200.0
1087,200.0
1088,200.0
1089,200.0
1090,200.0
1091,200.0
1092,200.0
1093,200.0
1094,200.0
1095,200.0
1096,200.0
1097,200.0
1098,200.0
1099,200.0
1100,200.0
1101,200.0
1102,200.0
1103,200.0
1104,200.0
1105,200.0
1106,200.0
1107,200.0
1108,200.0
1109,200.0
1110,200.0
1111,200.0
1112,200.0
1113,200.0
1114,200.0
1115,200.0
1116,200.0
1117,200.0
1118,200.0
1119,200.0
1120,200.0
1121,200.0
1122,200.0
1123,200.0
1124,200.0
1125,200.0
1126,200.0
1127,200.0
1128,200.0
1129,200.0
1130,200.0
1131,200.0
1132,200.0
1133,200.0
1134,200.0
1135,200.0
1136,200.0
1137,200.0
1138,200.0
1139,200.0
1140,200.0
1141,200.0
1142,200.0
1143,200.0
1144,200.0
1145,200.0
1146,200.0
1147,200.0
1148,200.0
1149,200.0
1150,200.0
1151,200.0
1152,200.0
1153,200.0
1154,200.0
1155,200.0
1156,200.0
1157,200.0
1158,200.0
1159,200.0
1160,200.0
1161,200.0
1162,200.0
1163,200.0
1164,200.0
1165,200.0
1166,200.0
1167,200.0
1168,200.0
1169,200.0
1170,200.0
1171,200.0
1172,200.0
1173,200.0
1174,200.0
1175,200.0
1176,200.0
1177,200.0
1178,200.0
1179,200.0
1180,200.0
1181,200.0
1182,200.0
1183,200.0
1184,200.0
1185,200.0
1186,200.0
1187,200.0
1188,200.0
1189,200.0
1190,200.0
1191,200.0
1192,200.0
1193,200.0
1194,200.0
1195,200.0
1196,200.0
1197,200.0
1198,200.0
1199,200.0
1200,200.0
1201,200.0
1202,200.0
1203,200.0
1204,200.0
1205,200.0
1206,200.0
1207,200.0
1208,200.0
1209,200.0
1210,200.0
1211,200.0
1212,200.0
1213,200.0
1214,200.0
1215,200.0
1216,200.0
1217,200.0
1218,200.0
1219,200.0
1220,200.0
1221,200.0
1222,200.0
1223,200.0
1224,200.0
1225,200.0
1226,197.0
1227,200.0
1228,191.0
1229,200.0
1230,200.0
1231,198.0
1232,200.0
1233,200.0
1234,200.0
1235,200.0
1236,200.0
1237,200.0
1238,199.0
1239,200.0
1240,191.0
1241,188.0
1242,179.0
1243,197.0
1244,188.0
1245,183.0
1246,194.0
1247,169.0
1248,166.0
1249,167.0
1250,192.0
1251,170.0
1252,186.0
1253,191.0
1254,181.0
1255,178.0
1256,179.0
1257,189.0
1258,179.0
1259,193.0
1260,177.0
1261,166.0
1262,191.0
1263,200.0
1264,182.0
1265,192.0
1266,198.0
1267,193.0
1268,183.0
1269,190.0
1270,200.0
1271,181.0
1272,200.0
1273,200.0
1274,200.0
1275,200.0
1276,200.0
1277,200.0
1278,200.0
1279,200.0
1280,200.0
1281,200.0
1282,200.0
1283,200.0
1284,200.0
1285,200.0
1286,200.0
1287,200.0
1288,200.0
1289,200.0
1290,199.0
1291,200.0
1292,200.0
1293,200.0
1294,200.0
1295,200.0
1296,200.0
1297,200.0
1298,200.0
1299,200.0
1300,200.0
1301,200.0
1302,185.0
1303,194.0
1304,188.0
1305,183.0
1306,190.0
1307,200.0
1308,186.0
1309,188.0
1310,189.0
1311,190.0
1312,198.0
1313,200.0
1314,199.0
1315,196.0
1316,200.0
1317,191.0
1318,200.0
1319,200.0
1320,200.0
1321,200.0
1322,200.0
1323,200.0
1324,200.0
1325,200.0
1326,200.0
1327,200.0
1328,200.0
1329,200.0
1330,200.0
1331,200.0
1332,200.0
1333,200.0
1334,200.0
1335,200.0
1336,200.0
1337,200.0
1338,200.0
1339,200.0
1340,200.0
1341,200.0
1342,200.0
1343,200.0
1344,200.0
1345,200.0
1346,200.0
1347,200.0
1348,200.0
1349,200.0
1350,200.0
1351,200.0
1352,200.0
1353,200.0
1354,200.0
1355,200.0
1356,200.0
1357,200.0
1358,200.0
1359,200.0
1360,200.0
1361,200.0
1362,200.0
1363,200.0
1364,200.0
1365,200.0
1366,200.0
1367,200.0
1368,200.0
1369,200.0
1370,200.0
1371,200.0
1372,196.0
1373,200.0
1374,200.0
1375,200.0
1376,200.0
1377,200.0
1378,200.0
1379,200.0
1380,188.0
1381,173.0
1382,150.0
1383,143.0
1384,138.0
1385,149.0
1386,146.0
1387,128.0
1388,144.0
1389,152.0
1390,151.0
1391,138.0
1392,157.0
1393,165.0
1394,161.0
1395,168.0
1396,165.0
1397,155.0
1398,165.0
1399,176.0
1400,174.0
1401,182.0
1402,155.0
1403,173.0
1404,174.0
1405,170.0
1406,170.0
1407,176.0
1408,176.0
1409,177.0
1410,180.0
1411,167.0
1412,177.0
1413,177.0
1414,178.0
1415,184.0
1416,179.0
1417,171.0
1418,176.0
1419,182.0
1420,183.0
1421,180.0
1422,170.0
1423,186.0
1424,179.0
1425,182.0
1426,182.0
1427,181.0
1428,193.0
1429,192.0
1430,182.0
1431,191.0
1432,179.0
1433,177.0
1434,180.0
1435,175.0
1436,168.0
1437,187.0
1438,181.0
1439,194.0
1440,188.0
1441,184.0
1442,188.0
1443,195.0
1444,176.0
1445,190.0
1446,189.0
1447,172.0
1448,186.0
1449,181.0
1450,200.0
1451,190.0
1452,191.0
1453,200.0
1454,186.0
1455,175.0
1456,187.0
1457,200.0
1458,186.0
1459,198.0
1460,182.0
1461,181.0
1462,200.0
1463,198.0
1464,181.0
1465,200.0
1466,200.0
1467,200.0
1468,200.0
1469,200.0
1470,199.0
1471,195.0
1472,200.0
1473,200.0
1474,200.0
1475,200.0
1476,200.0
1477,199.0
1478,196.0
1479,200.0
1480,200.0
1481,200.0
1482,188.0
1483,200.0
1484,200.0
1485,186.0
1486,188.0
1487,196.0
1488,188.0
1489,185.0
1490,183.0
1491,189.0
1492,200.0
1493,199.0
1494,200.0
1495,196.0
1496,200.0
1497,200.0
1498,198.0
1499,200.0
1500,198.0
1501,200.0
1502,200.0
1503,200.0
1504,200.0
1505,200.0
1506,200.0
1507,200.0
1508,200.0
1509,179.0
1510,185.0
1511,171.0
1512,187.0
1513,179.0
1514,170.0
1515,187.0
1516,184.0
1517,179.0
1518,179.0
1519,192.0
1520,185.0
1521,189.0
1522,188.0
1523,200.0
1524,194.0
1525,200.0
1526,200.0
1527,196.0
1528,199.0
1529,200.0
1530,200.0
1531,200.0
1532,200.0
1533,200.0
1534,200.0
1535,200.0
1536,200.0
1537,200.0
1538,200.0
1539,200.0
1540,198.0
1541,200.0
1542,200.0
1543,200.0
1544,200.0
1545,200.0
1546,200.0
1547,200.0
1548,200.0
1549,200.0
1550,200.0
1551,200.0
1552,200.0
1553,200.0
1554,200.0
1555,200.0
1556,200.0
1557,200.0
1558,200.0
1559,200.0
1560,200.0
1561,200.0
1562,200.0
1563,200.0
1564,200.0
1565,200.0
1566,200.0
1567,200.0
1568,200.0
1569,200.0
1570,200.0
1571,200.0
1572,200.0
1573,200.0
1574,200.0
1575,200.0
1576,200.0
1577,200.0
1578,200.0
1579,200.0
1580,200.0
1581,200.0
1582,200.0
1583,200.0
1584,200.0
1585,200.0
1586,200.0
1587,200.0
1588,200.0
1589,200.0
1590,200.0
1591,200.0
1592,200.0
1593,200.0
1594,200.0
1595,200.0
1596,200.0
1597,200.0
1598,200.0
1599,200.0
1600,200.0
1601,200.0
1602,200.0
1603,200.0
1604,200.0
1605,200.0
1606,200.0
1607,200.0
1608,200.0
1609,200.0
1610,200.0
1611,200.0
1612,200.0
1613,200.0
1614,200.0
1615,200.0
1616,200.0
1617,200.0
1618,200.0
1619,200.0
1620,200.0
1621,200.0
1622,200.0
1623,200.0
1624,200.0
1625,200.0
1626,200.0
1627,200.0
1628,200.0
1629,200.0
1630,200.0
1631,200.0
1632,200.0
1633,200.0
1634,200.0
1635,200.0
1636,200.0
1637,200.0
1638,200.0
1639,200.0
1640,200.0
1641,200.0
1642,200.0
1643,200.0
1644,200.0
1645,200.0
1646,200.0
1647,200.0
1648,200.0
1649,200.0
1650,200.0
1651,200.0
1652,200.0
1653,200.0
1654,200.0
1655,200.0
1656,200.0
1657,200.0
1658,200.0
1659,200.0
1660,200.0
1661,200.0
1662,200.0
1663,200.0
1664,200.0
1665,200.0
1666,200.0
1667,200.0
1668,200.0
1669,200.0
1670,200.0
1671,200.0
1672,200.0
1673,200.0
1674,200.0
1675,200.0
1676,200.0
1677,200.0
1678,200.0
1679,200.0
1680,200.0
1681,200.0
1682,200.0
1683,200.0
1684,200.0
1685,200.0
1686,200.0
1687,200.0
1688,200.0
1689,200.0
1690,200.0
1691,200.0
1692,200.0
1693,200.0
1694,200.0
1695,200.0
1696,200.0
1697,200.0
1698,200.0
1699,200.0
1700,200.0
1701,200.0
1702,200.0
1703,200.0
1704,200.0
1705,200.0
1706,200.0
1707,200.0
1708,200.0
1709,200.0
1710,200.0
1711,200.0
1712,200.0
1713,200.0
1714,200.0
1715,200.0
1716,200.0
1717,200.0
1718,200.0
1719,200.0
1720,200.0
1721,200.0
1722,200.0
1723,200.0
1724,200.0
1725,200.0
1726,200.0
1727,200.0
1728,200.0
1729,200.0
1730,200.0
1731,200.0
1732,200.0
1733,200.0
1734,200.0
1735,200.0
1736,200.0
1737,200.0
1738,200.0
1739,200.0
1740,200.0
1741,200.0
1742,200.0
1743,200.0
1744,200.0
1745,200.0
1746,200.0
1747,200.0
1748,200.0
1749,200.0
1750,200.0
1751,200.0
1752,200.0
1753,200.0
1754,200.0
1755,200.0
1756,200.0
1757,200.0
1758,200.0
1759,200.0
1760,200.0
1761,200.0
1762,200.0
1763,200.0
1764,200.0
1765,200.0
1766,200.0
1767,200.0
1768,200.0
1769,200.0
1770,200.0
1771,200.0
1772,200.0
1773,200.0
1774,200.0
1775,200.0
1776,200.0
1777,200.0
1778,200.0
1779,200.0
1780,200.0
1781,200.0
1782,200.0
1783,200.0
1784,200.0
1785,200.0
1786,200.0
1787,200.0
1788,200.0
1789,200.0
1790,200.0
1791,200.0
1792,200.0
1793,200.0
1794,200.0
1795,200.0
1796,200.0
1797,200.0
1798,200.0
1799,200.0
1800,200.0
1801,200.0
1802,200.0
1803,200.0
1804,200.0
1805,200.0
1806,200.0
1807,200.0
1808,200.0
1809,200.0
1810,200.0
1811,200.0
1812,200.0
1813,200.0
1814,200.0
1815,200.0
1816,200.0
1817,200.0
1818,200.0
1819,200.0
1820,200.0
1821,200.0
1822,200.0
1823,200.0
1824,200.0
1825,200.0
1826,200.0
1827,200.0
1828,200.0
1829,200.0
1830,200.0
1831,200.0
1832,200.0
1833,200.0
1834,200.0
1835,200.0
1836,200.0
1837,200.0
1838,200.0
1839,200.0
1840,200.0
1841,200.0
1842,200.0
1843,200.0
1844,200.0
1845,200.0
1846,200.0
1847,200.0
1848,200.0
1849,200.0
1850,200.0
1851,200.0
1852,200.0
1853,200.0
1854,200.0
1855,200.0
1856,200.0
1857,200.0
1858,200.0
1859,200.0
1860,200.0
1861,200.0
1862,200.0
1863,195.0
1864,191.0
1865,196.0
1866,193.0
1867,187.0
1868,182.0
1869,197.0
1870,197.0
1871,200.0
1872,200.0
1873,200.0
1874,200.0
1875,200.0
1876,200.0
1877,200.0
1878,200.0
1879,200.0
1880,200.0
1881,200.0
1882,200.0
1883,200.0
1884,200.0
1885,200.0
1886,200.0
1887,200.0
1888,200.0
1889,200.0
1890,198.0
1891,200.0
1892,182.0
1893,200.0
1894,200.0
1895,196.0
1896,200.0
1897,200.0
1898,197.0
1899,182.0
1900,200.0
1901,187.0
1902,166.0
1903,185.0
1904,166.0
1905,181.0
1906,156.0
1907,153.0
1908,155.0
1909,159.0
1910,173.0
1911,159.0
1912,153.0
1913,156.0
1914,146.0
1915,162.0
1916,145.0
1917,164.0
1918,157.0
1919,143.0
1920,166.0
1921,133.0
1922,174.0
1923,174.0
1924,157.0
1925,174.0
1926,194.0
1927,172.0
1928,190.0
1929,180.0
1930,196.0
1931,182.0
1932,178.0
1933,182.0
1934,200.0
1935,187.0
1936,195.0
1937,180.0
1938,185.0
1939,183.0
1940,200.0
1941,197.0
1942,200.0
1943,199.0
1944,200.0
1945,200.0
1946,200.0
1947,200.0
1948,200.0
1949,200.0
1950,194.0
1951,200.0
1952,199.0
1953,199.0
1954,199.0
1955,190.0
1956,200.0
1957,200.0
1958,200.0
1959,200.0
1960,200.0
1961,200.0
1962,191.0
1963,200.0
1964,200.0
1965,200.0
1966,200.0
1967,200.0
1968,192.0
1969,190.0
1970,190.0
1971,200.0
1972,200.0
1973,200.0
1974,200.0
1975,200.0
1976,200.0
1977,200.0
1978,200.0
1979,200.0
1980,200.0
1981,200.0
1982,200.0
1983,200.0
1984,200.0
1985,200.0
1986,200.0
1987,200.0
1988,200.0
1989,200.0
1990,200.0
1991,200.0
1992,200.0
1993,200.0
1994,198.0
1995,200.0
1996,200.0
1997,200.0
1998,200.0
1999,200.0
2000,200.0
2001,200.0
2002,200.0
================================================
FILE: history/ac_continue.csv
================================================
Episode_reward,actor_loss,critic_loss,episode
-128.90520572493745,-0.11816113442182541,0.218042254447937,0
-173.51670990790785,-0.06733673810958862,0.0800720751285553,1
-187.8171977312419,-0.002720972988754511,0.002852647565305233,2
-173.67273616272772,-0.036070723086595535,0.04723271355032921,3
-156.9649702397321,-0.09143099188804626,0.11979616433382034,4
-169.8156995095101,-0.03845040500164032,0.03830729424953461,5
-183.36885903413648,-0.004992833361029625,0.004520351532846689,6
-177.1069876314097,-0.0031833588145673275,0.008521170355379581,7
-175.49269381318348,0.0024499772116541862,0.004748632665723562,8
-165.72158234375667,0.018952136859297752,0.03374912217259407,9
-160.16273015033792,-0.0030132168903946877,0.02405666373670101,10
-168.84337562314553,-0.01132543571293354,0.009634644724428654,11
-165.95408313158416,-0.012529523111879826,0.009909496642649174,12
-162.8677057475428,-0.015982067212462425,0.004808629862964153,13
-163.5753816694468,-0.06084240600466728,0.004999194294214249,14
-156.0625641940695,-0.03364159166812897,0.006716023664921522,15
-121.32997046665385,-0.10144391655921936,0.30767059326171875,16
-101.54557290068891,-0.05899958312511444,0.13214406371116638,17
-121.41828099390946,-0.011773789301514626,0.15117333829402924,18
-156.06130000605796,0.03307399898767471,0.02828930877149105,19
-155.56864909637278,0.03242425620555878,0.020547766238451004,20
-145.36199061768616,0.02054990455508232,0.04608447104692459,21
-151.7792733380804,0.03428905829787254,0.03264655917882919,22
-139.84881185297428,-0.0009240770596079528,0.04941253736615181,23
-150.03480041550247,-0.07503605633974075,0.021758461371064186,24
-140.70167880684505,0.007148387376219034,0.052074089646339417,25
-159.85961878595552,-0.030138272792100906,0.0173195693641901,26
-158.76315171915036,-0.053707435727119446,0.005946178920567036,27
-154.52882069043915,-0.031325992196798325,0.02829182706773281,28
-158.9817618372644,-0.027006058022379875,0.006326370406895876,29
-160.32050107010363,-0.05012276768684387,0.003761639818549156,30
-149.30135812385507,-0.040228523313999176,0.009980856440961361,31
-145.03741825592965,-0.01341972965747118,0.032863400876522064,32
-118.49762380847548,0.0034106364473700523,0.012959661893546581,33
-133.83763953421212,-0.028365153819322586,0.011703753843903542,34
-116.23202698261296,-0.045169100165367126,0.026461677625775337,35
-117.61107859971733,-0.0268267635256052,0.062097977846860886,36
-149.894905565406,0.005340625066310167,0.08046764135360718,37
-154.074162769591,-0.011202293448150158,0.012075615115463734,38
-134.36350054408567,0.01002783328294754,0.037375565618276596,39
-150.85834169505216,-0.035754427313804626,0.018294362351298332,40
-145.50118224184877,-0.014716806821525097,0.006025801412761211,41
-150.63776323816188,0.020595764741301537,0.015893014147877693,42
-130.09002062901482,-0.002018730156123638,0.02221151813864708,43
-156.9060387154177,-0.04334793612360954,0.004678923636674881,44
-145.99038874505132,-0.035547368228435516,0.0054659126326441765,45
-119.65712979195246,-0.018048271536827087,0.020358659327030182,46
-150.21738698114189,-0.029474914073944092,0.007732502184808254,47
-109.82576633740165,-0.007172092795372009,0.01311739906668663,48
-149.36941666089103,-0.035635992884635925,0.008552027866244316,49
-138.89770085503423,-0.03686399757862091,0.009956286288797855,50
-141.0037562123717,-0.03394555673003197,0.006923982407897711,51
-101.21312218966172,-0.004798423033207655,0.03019476681947708,52
-148.75453726331315,-0.010606023482978344,0.24707195162773132,53
-138.3855535751284,-0.03625988960266113,0.09367780387401581,54
-127.78297799326874,-0.03951246291399002,0.0284748338162899,55
-130.51456405908363,-0.011563140898942947,0.009528623893857002,56
-137.87126854927783,-0.018686706200242043,0.014867258258163929,57
-127.42378756734031,-0.02356593683362007,0.0075768339447677135,58
-125.70859625109861,-0.033339206129312515,0.009255921468138695,59
-119.05359541477232,-0.04140903800725937,0.02614688314497471,60
-129.84422357598675,-0.009087968617677689,0.015148225240409374,61
-90.63923559661282,-0.00835389643907547,0.04817446693778038,62
-81.54950736175697,-0.027849368751049042,0.044494982808828354,63
-129.2466770564977,-0.021401122212409973,0.02429121732711792,64
-148.95003472734732,-0.037785496562719345,0.006264501716941595,65
-129.42403966697339,-0.02046314626932144,0.018860243260860443,66
-138.06388137480945,-0.026568971574306488,0.008116014301776886,67
-135.52334782635612,-0.005726654548197985,0.004426317289471626,68
-117.42493596719875,-0.03409242257475853,0.01645597256720066,69
-113.3602089177063,-0.0016682073473930359,0.012022992596030235,70
-90.90984976171424,0.018260039389133453,0.05911561846733093,71
-78.78985121004055,0.04589872434735298,0.09445640444755554,72
-90.34137985465165,0.009522655978798866,0.05463223159313202,73
-64.33954879790882,-0.0007424831273965538,0.04739539325237274,74
-79.52557487511162,0.022538084536790848,0.0658935084939003,75
-65.03050377511977,-0.039056695997714996,0.04370497167110443,76
-95.78177422592981,-0.03153213858604431,0.05397535488009453,77
-66.51977812711509,-0.04175734892487526,0.031710051000118256,78
-111.13418462322932,-0.020386528223752975,0.04701628535985947,79
-78.01147432118017,0.0029152496717870235,0.053450051695108414,80
-78.7047083325445,-0.03163432702422142,0.03992544114589691,81
-62.88949006177542,-0.004482750315219164,0.038229282945394516,82
-51.76042527448,-0.012605268508195877,0.024886665865778923,83
-90.737748473562,-0.01493663527071476,0.036765206605196,84
-97.83977206942664,-0.032980840653181076,0.0251147598028183,85
-149.32810571283954,-0.06770310550928116,0.021020108833909035,86
-94.74706307003167,-0.01194031722843647,0.06443748623132706,87
-95.46576808934827,-0.03786374256014824,0.019207943230867386,88
-158.87631174076228,-0.033470023423433304,0.01177558396011591,89
-115.16114572097554,-0.06218460202217102,0.03541203588247299,90
-65.5239862169029,0.013167922385036945,0.04131739214062691,91
-115.44170789305701,-0.026105327531695366,0.014657242223620415,92
-109.37613756038552,-0.028369365260004997,0.016077904030680656,93
-124.4126175601616,-0.015185249038040638,0.00830584205687046,94
-155.03070477429873,-0.028593778610229492,0.006861711852252483,95
-105.1723590170853,-0.0010148596484214067,0.051170479506254196,96
-93.13523837206614,-0.031695228070020676,0.030322112143039703,97
-92.0780088085127,-0.014456412754952908,0.06276308000087738,98
-157.98159915337575,-0.03909457102417946,0.008158702403306961,99
-52.2910126989272,-0.0045435321517288685,0.07484341412782669,100
-124.02489238812552,-0.12134473770856857,0.285476416349411,101
-150.0383145206508,0.015463005751371384,0.1962101012468338,102
-124.79979473721735,0.029857315123081207,0.06587255001068115,103
-151.93783510203392,0.01816432923078537,0.06004641577601433,104
-153.46082939526346,0.025976231321692467,0.018087945878505707,105
-153.0643940476726,0.041255105286836624,0.01522158831357956,106
-147.3605646925618,0.022746717557311058,0.05003218725323677,107
-144.73965920710717,0.027152249589562416,0.02863638661801815,108
-135.96452760584776,0.024353479966521263,0.032396815717220306,109
-118.73612073550285,0.00028955817106179893,0.06907561421394348,110
-98.50319380358809,0.028907084837555885,0.07416780292987823,111
-122.32031241415092,0.0018870711792260408,0.04241432249546051,112
-52.68386323472808,0.06541673839092255,0.050833940505981445,113
-148.17981183314419,-0.05534721538424492,0.038898516446352005,114
-158.66410200129022,-0.03129463270306587,0.002405643230304122,115
-109.0615272106022,-0.02128031477332115,0.05848981812596321,116
-124.9109849556943,0.008534370921552181,0.03075779601931572,117
-135.04347801407317,-0.025528687983751297,0.011244556866586208,118
-145.22483747259355,-0.023046214133501053,0.007142215967178345,119
-143.25356316301173,-0.042317528277635574,0.017895519733428955,120
-144.6934264522957,-0.032824937254190445,0.021521756425499916,121
-130.0159842576681,0.0014981436543166637,0.04432859271764755,122
-115.66706610998655,-0.008848265744745731,0.05973004922270775,123
-127.51272586099758,-0.03261607512831688,0.0314054861664772,124
-155.72650282309945,-0.029671311378479004,0.005902679171413183,125
-153.19235486690124,-0.033465296030044556,0.005418254528194666,126
-132.82926528195182,-0.0034157789777964354,0.019089041277766228,127
-146.7012104483071,-0.027191782370209694,0.00704195536673069,128
-140.18775701636244,-0.025628168135881424,0.011274627409875393,129
-125.38016013503915,-0.024169037118554115,0.02482154406607151,130
-100.43522021625355,-0.07547062635421753,0.08369606733322144,131
-139.93956609761557,-0.028063612058758736,0.007911020889878273,132
-133.39955124544062,-0.03748668357729912,0.014851379208266735,133
-139.6341326355783,-0.028383878991007805,0.0059127239510416985,134
-119.90442020868416,-0.017330193892121315,0.018415279686450958,135
-117.1909711630189,-0.01607094518840313,0.018679514527320862,136
-125.2698031165435,-0.017590919509530067,0.008127118460834026,137
-136.92165300838857,-0.02812233380973339,0.007300146389752626,138
-125.59011403449709,-0.004780275281518698,0.008428655564785004,139
-124.65287338796519,-0.005278449039906263,0.009849241003394127,140
-80.67677047875728,0.0029387688264250755,0.05523877590894699,141
-158.06881067430632,-0.009701714850962162,0.016260672360658646,142
-149.26043385530247,-0.02822364866733551,0.005844067316502333,143
-115.98356587708639,3.375232336111367e-05,0.048886489123106,144
-124.4142707287005,-0.01206234935671091,0.04648931324481964,145
-147.7335764325854,-0.04082320258021355,0.00920157227665186,146
-98.63444284701457,-0.014958950690925121,0.03036716766655445,147
-123.65553689394451,-0.019268976524472237,0.02408592775464058,148
-153.73841100505433,-0.03331742435693741,0.01404921431094408,149
-118.65057358780756,0.004243273753672838,0.042835019528865814,150
-158.1663332257977,-0.0138799287378788,0.006569362711161375,151
-114.56265420000565,-0.03637496381998062,0.038454726338386536,152
-82.02326355042123,0.008951845578849316,0.1299847960472107,153
-156.35294001753226,-0.010119437240064144,0.03095921501517296,154
-98.56655363503455,-0.02011219412088394,0.05828256532549858,155
-138.26449021936,-0.04676579311490059,0.108129121363163,156
-130.08203116572392,0.002649549162015319,0.04069558158516884,157
-149.93142965469266,0.026073450222611427,0.043405186384916306,158
-160.31117219680013,-0.0008374816388823092,0.015347685664892197,159
-149.59651444694634,0.05680752918124199,0.0535757914185524,160
-150.9233505659622,0.03021392412483692,0.03609118610620499,161
-144.2835540547254,0.023117385804653168,0.040417127311229706,162
-159.88837204485108,-0.007153730373829603,0.00622953474521637,163
-150.78474056341705,0.02053423970937729,0.04304128512740135,164
-136.03372166221877,-0.02200733870267868,0.17049196362495422,165
-148.18934802724675,-0.024558095261454582,0.04619152843952179,166
-151.2847497371689,0.02149391919374466,0.03446880355477333,167
-158.8350684159196,-0.005283558741211891,0.0014910970348864794,168
-152.81144919778453,0.0007866831147111952,0.0452098548412323,169
-140.9202068856542,0.002955421106889844,0.05157984793186188,170
-140.54003674604775,0.032337453216314316,0.06610522419214249,171
-144.1423798065869,0.01574571430683136,0.04251216724514961,172
-133.41967355793548,0.036541473120450974,0.04532473534345627,173
-152.39713689540835,0.011199031956493855,0.02551628090441227,174
-145.5557626919585,-0.004346379544585943,0.11224283277988434,175
-150.37809405557874,0.04738866910338402,0.10094867646694183,176
-142.30471975630527,-0.027170171961188316,0.029905376955866814,177
-148.77613646117803,0.02065015397965908,0.024793371558189392,178
-156.30290763867671,-0.026349496096372604,0.013088230043649673,179
-150.45698192218146,0.026822783052921295,0.03536970913410187,180
-118.82531151047955,0.022746743634343147,0.036443475633859634,181
-137.35086893312732,-0.0023798823822289705,0.031157419085502625,182
-107.45004553709376,-0.022908443585038185,0.029892168939113617,183
-141.27673984687462,0.003531284863129258,0.09818501770496368,184
-138.3823978754845,-0.010183075442910194,0.04263695701956749,185
-98.03933186449947,0.05567610636353493,0.10059705376625061,186
-124.51735539755273,-0.06041458994150162,0.026982726529240608,187
-65.66860354333137,0.0636170282959938,0.038291048258543015,188
-81.84785284651494,-0.0020879709627479315,0.035468075424432755,189
-52.113815340505624,0.03987911343574524,0.041829537600278854,190
-73.14194619074667,-0.012664618901908398,0.047694891691207886,191
-157.84153737489083,-0.023931138217449188,0.011039887554943562,192
-80.41695721543438,-0.03668137639760971,0.04912807047367096,193
-85.4224593415252,-0.0130471708253026,0.03700229525566101,194
-39.04873012349948,0.01953401044011116,0.04419881850481033,195
-113.38524057930654,-0.01590311899781227,0.053968679159879684,196
-78.35406210471025,-0.006963891908526421,0.05457195267081261,197
-121.99739670812546,-0.012811277061700821,0.04539956897497177,198
-109.73169673919942,0.0026423779781907797,0.041475724428892136,199
-105.02001858407387,-0.01930549181997776,0.03942437469959259,200
-124.64346357242043,-0.02973216213285923,0.02683805488049984,201
-158.97169203246614,-0.022996850311756134,0.005472598131746054,202
-55.975501705172185,-0.03305701166391373,0.04367101565003395,203
-90.95345455783719,-0.03988717496395111,0.06918631494045258,204
-140.48699088464716,-0.0098182437941432,0.029868654906749725,205
-102.67706295142676,-0.024660401046276093,0.08131998032331467,206
-76.95502994402632,0.022504493594169617,0.07006444782018661,207
-93.52566294763956,-0.02607918716967106,0.046455267816782,208
-113.8081430001521,-0.03847579285502434,0.037268634885549545,209
-100.53893622022973,-0.02706807851791382,0.04681858420372009,210
-41.42783467805271,-0.020472832024097443,0.05476333945989609,211
-124.60772300858447,0.005437870044261217,0.04161582887172699,212
-157.0984480853069,-0.013465720228850842,0.004529261030256748,213
-128.66427159515996,-0.05583783984184265,0.037003979086875916,214
-128.98599452262732,-0.04729686677455902,0.023879023268818855,215
-110.25429588691877,-0.09077896177768707,0.06066092476248741,216
-120.11882058255617,-0.013750850223004818,0.036198850721120834,217
-140.42162211761624,-0.02635546401143074,0.025956392288208008,218
-146.66137230809053,-0.017644003033638,0.015038125216960907,219
-40.011129997533196,0.04881119728088379,0.06652971357107162,220
-117.20162421839083,-0.013320665806531906,0.055077072232961655,221
-127.32575085866836,-0.018112163990736008,0.03256919980049133,222
-152.32876644491492,-0.016217157244682312,0.01988878659904003,223
-137.87318563261618,-0.11298264563083649,0.329019695520401,224
-148.91628818201937,0.036543361842632294,0.07683397084474564,225
-150.678245305479,0.05873740091919899,0.0547025091946125,226
-150.77903437207556,0.011996149085462093,0.030146360397338867,227
-133.71793128390883,0.0332992747426033,0.07143933326005936,228
-140.42604292530626,0.0189902875572443,0.044806819409132004,229
-142.4752806440557,-0.006941778585314751,0.035318773239851,230
-150.03500688030834,0.009044052101671696,0.029355064034461975,231
-139.04874548524862,-0.020447663962841034,0.044867534190416336,232
-146.83845227992944,-0.0023876719642430544,0.0382225438952446,233
-142.58388430205832,0.009198383428156376,0.04529896751046181,234
-134.8745679635526,0.020278535783290863,0.05896954610943794,235
-134.80503921758992,0.01638519950211048,0.06039910390973091,236
-148.89170484945157,0.014368806034326553,0.04100581631064415,237
-137.0309946765213,-0.0289583969861269,0.05337051302194595,238
-145.73603818858214,0.011523835361003876,0.04102834686636925,239
-142.9160895737153,0.022374635562300682,0.04053419083356857,240
-91.52763971642094,0.024928206577897072,0.073421411216259,241
-52.61703835772214,0.024831969290971756,0.035120945423841476,242
-50.933602723057504,-0.016637394204735756,0.057265061885118484,243
-64.66246557071912,-0.03596227616071701,0.036646172404289246,244
-51.91344908009875,-0.009127596393227577,0.03635336831212044,245
-78.8106063596794,-0.026722149923443794,0.05818133428692818,246
-131.91771729448482,0.0029772853013128042,0.03929189592599869,247
-26.229942542948073,0.03517855703830719,0.022852012887597084,248
-106.0781607419431,-0.07640299946069717,0.10025783628225327,249
-159.10366695161107,-0.03250008448958397,0.014282047748565674,250
-64.57643400726433,0.02826712094247341,0.041412536054849625,251
-78.71924932135101,-0.013313734903931618,0.03240490332245827,252
-52.455020884949114,-0.013538490980863571,0.031189188361167908,253
-74.81716925905822,-0.03611985594034195,0.05175074562430382,254
-90.00617358260301,-0.08829410374164581,0.052084069699048996,255
-67.90359650291975,-0.032218579202890396,0.03038950450718403,256
-71.54257529738055,-0.019493194296956062,0.026491494849324226,257
-77.79305191875986,-0.09359975159168243,0.028879983350634575,258
-41.49660884716948,-0.007207376416772604,0.021043358370661736,259
-65.29821711289533,-0.03426751866936684,0.04617427662014961,260
-157.65190295536127,-0.024333275854587555,0.00833294726908207,261
-95.50223871201005,-0.03168012201786041,0.036341384053230286,262
-80.82715112753124,-0.03059590421617031,0.026487188413739204,263
-82.49194259934572,-0.007053407374769449,0.03766786679625511,264
-0.16988046325441633,-0.00017406203551217914,8.908264135243371e-07,265
-0.13265456481323898,-7.714905950706452e-05,5.980605806144013e-07,266
-0.12260173922119749,-6.537969056807924e-06,4.4672387389255164e-07,267
-94.42173826314847,-0.04438967630267143,0.07521101087331772,268
-65.01396785148204,-0.012792889960110188,0.05583586171269417,269
-158.41705798588742,-0.01075009722262621,0.02430165559053421,270
-0.2986386800790562,0.00046946509974077344,7.062428721837932e-06,271
-65.76089619126442,-0.02874954231083393,0.04023776203393936,272
-13.56078677965855,-0.027453714981675148,0.015596875920891762,273
-161.90905609970693,-0.030940227210521698,0.016619842499494553,274
-26.58366386983076,-0.02131103351712227,0.02678052894771099,275
-26.4752039783739,-0.03461957350373268,0.0360800139605999,276
-39.35447062916321,-0.00841829925775528,0.044284526258707047,277
-160.14692196687014,-0.023141052573919296,0.007844404317438602,278
-26.18780562604063,-0.007899651303887367,0.05480824410915375,279
-122.13339309399962,-0.04018831253051758,0.040583446621894836,280
-52.65396682666125,-0.004132509231567383,0.05999450385570526,281
-108.8547800469949,-0.033220838755369186,0.04260016605257988,282
-93.33330283918276,-0.030547283589839935,0.03975842520594597,283
-149.2842393186756,-0.019427135586738586,0.018807558342814445,284
-159.56881981160265,-0.03281332552433014,0.003732141340151429,285
-105.62199130929832,-0.027130236849188805,0.028560424223542213,286
-67.46272220247214,-0.02661566436290741,0.028932569548487663,287
-0.289600935035014,-5.0242590077687055e-05,7.010929152784229e-07,288
-96.34998885095644,-0.03182423487305641,0.059965528547763824,289
-52.84627718045333,-0.028930911794304848,0.04108802229166031,290
-81.12083525533589,-0.029006628319621086,0.03198597580194473,291
-105.86589411579529,-0.08842560648918152,0.13883435726165771,292
-79.51643513963737,0.0013816994614899158,0.053708624094724655,293
-0.15596549991063124,0.028698373585939407,0.004330551251769066,294
-53.91334812383687,-0.024194039404392242,0.0608505979180336,295
-94.48546394439745,0.004499424714595079,0.11409537494182587,296
-66.1720022593302,0.022924331948161125,0.03452332317829132,297
-40.27193942788025,-0.010099074803292751,0.035563912242650986,298
-73.53049550451779,-0.03727560117840767,0.06251254677772522,299
-0.17802951888561805,0.0002583649766165763,2.2563531274499837e-06,300
-82.8453160076496,-0.0720314309000969,0.16718555986881256,301
-112.79071781686828,-0.07028577476739883,0.1087086871266365,302
-92.31103873728016,0.005163075868040323,0.03942626714706421,303
-92.19721804123651,-0.04383604973554611,0.06991662085056305,304
-65.43294974819163,-0.044362831860780716,0.09413345158100128,305
-71.78681755939895,-0.006908423732966185,0.044377729296684265,306
-26.296312000984045,-0.003579825861379504,0.016468774527311325,307
-39.78674380996396,0.032871704548597336,0.09292875975370407,308
-78.4225844984884,-0.0322578065097332,0.10654942691326141,309
-77.5764192774322,0.0029102445114403963,0.04250193014740944,310
-93.10038842095759,-0.05228818953037262,0.09260357916355133,311
-72.56638710204888,-0.006537484936416149,0.042125921696424484,312
-65.67072043251908,-0.01822699047625065,0.03742736950516701,313
-39.63048076479607,-0.03022678568959236,0.01932057924568653,314
-81.57633903032676,-0.04046407714486122,0.08362016826868057,315
-0.3999511242819041,-0.0012998484307900071,0.00812481064349413,316
-26.34744457274779,0.015010826289653778,0.03120376169681549,317
-65.62875216019172,-0.031625378876924515,0.05775437504053116,318
-57.238158380741105,2.1634101358358748e-05,0.033826034516096115,319
-13.03000449518145,-0.027791587635874748,0.024137942120432854,320
-13.263086435151326,-0.0020798766054213047,0.015616859309375286,321
-0.23980391378834404,2.036239038716303e-06,4.93409288537805e-07,322
-39.59459072099135,-0.04871445521712303,0.038839951157569885,323
-38.95194855057293,-0.016660051420331,0.05507650971412659,324
-115.56423112914591,-0.030799351632595062,0.07477468252182007,325
-67.72627046432298,-0.00073583098128438,0.05578972026705742,326
-121.68831463286674,-0.03669098764657974,0.06691554933786392,327
-136.0618572239932,-0.030397621914744377,0.06042831018567085,328
-107.65609156854728,0.004998049698770046,0.07635364681482315,329
-159.35191949126605,-0.006734650116413832,0.010695707984268665,330
-144.23526723075355,-0.0032358583994209766,0.0397292859852314,331
-13.177122139797524,0.017588257789611816,0.023249244317412376,332
-140.13245220365462,0.022651666775345802,0.06364542245864868,333
-0.6909078193530611,-0.015548703260719776,0.008620300330221653,334
-0.5809063671931585,-0.0003603027726057917,0.0004928346024826169,335
-148.67940388353225,0.025887709110975266,0.04290877282619476,336
-123.7029926543708,-0.02257213369011879,0.09929020702838898,337
-76.80095216500077,0.0729958564043045,0.07670199126005173,338
-91.30652644635214,-0.016187427565455437,0.050154224038124084,339
-104.24790503396417,-0.02484406903386116,0.028579819947481155,340
-85.2093164910138,0.010722042061388493,0.030846664682030678,341
-109.86321898178868,-0.02547280117869377,0.049173444509506226,342
-106.66345431906547,-0.027276229113340378,0.06361830979585648,343
-156.62251445733983,-0.02034415677189827,0.032879430800676346,344
-117.34566581461227,-0.02215372957289219,0.04584536701440811,345
-111.03049947283328,-0.13828106224536896,0.1959814429283142,346
-77.25504380973877,0.014753359369933605,0.03880394622683525,347
-79.25465550712157,-0.041737738996744156,0.06206793710589409,348
-77.81159838833679,-0.035130929201841354,0.04375605657696724,349
-66.2578901699322,-0.025602025911211967,0.04114961624145508,350
-64.32109044745705,-0.0614108070731163,0.04912661015987396,351
-119.47176354548931,0.00034532189602032304,0.13319894671440125,352
-131.81428279485232,-0.007083946373313665,0.06341385096311569,353
-114.13586775534007,0.006838161963969469,0.048227161169052124,354
-122.84534563144177,0.00707246083766222,0.045787420123815536,355
-89.3372111934452,0.004162793513387442,0.055781684815883636,356
-77.455975007448,-0.03300943970680237,0.041458576917648315,357
-78.24745691402164,-0.057438597083091736,0.034609824419021606,358
-77.48534981371445,-0.04557276889681816,0.039204347878694534,359
-147.35581241535473,-0.03260508552193642,0.01366596482694149,360
-151.21882988111395,-0.029944317415356636,0.007624490186572075,361
-153.76092886446543,-0.043216366320848465,0.006593412719666958,362
-90.93758126837973,-0.041089847683906555,0.07904022932052612,363
-80.71338951379921,0.006569467950612307,0.06912820041179657,364
-53.56679842127747,-0.02716740593314171,0.06527707725763321,365
-82.31395940161049,-0.04524749889969826,0.05573941394686699,366
-65.31397054206496,-0.01116603147238493,0.03947030380368233,367
-65.78585635219996,-0.010563625022768974,0.0441976822912693,368
-69.25677853186463,-0.03414547070860863,0.06310441344976425,369
-41.84801459207408,-0.09057784080505371,0.03561611846089363,370
-123.60045274517752,-0.11348411440849304,0.07958002388477325,371
-158.40042048595762,-0.0095997778698802,0.012439865618944168,372
-134.7026191169718,-0.006412342190742493,0.0583699606359005,373
-119.36949283629957,-0.006020812317728996,0.04879005253314972,374
-51.502852638686875,0.030146537348628044,0.09234855324029922,375
-72.38128515991072,0.011469412595033646,0.07139140367507935,376
-109.13642483303863,-0.019533904269337654,0.037893518805503845,377
-77.8612831556905,-0.018678924068808556,0.05064314231276512,378
-149.10992153878846,-0.0236445851624012,0.023128606379032135,379
-104.17486459455499,-0.018880674615502357,0.04035788029432297,380
-92.10392493420534,-0.04708840698003769,0.04466293752193451,381
-122.62916791256436,-0.007454319391399622,0.011400115676224232,382
-80.27965560317581,-0.020442957058548927,0.029220538213849068,383
-121.78133388695602,-0.025428686290979385,0.014057955704629421,384
-108.4478983150611,-0.046888839453458786,0.022410845384001732,385
-62.85378436789923,-0.03716282919049263,0.0240864809602499,386
-143.90706364553427,-0.035480089485645294,0.016230376437306404,387
-71.69290105336381,-0.04514560103416443,0.03984649106860161,388
-148.2371221678891,-0.01757737807929516,0.021576261147856712,389
-158.87379815520634,-0.04719541594386101,0.008522883988916874,390
-157.49557214036702,-0.04774903133511543,0.00976511836051941,391
-152.79098571848493,-0.03778228908777237,0.00956884864717722,392
-110.80371241392486,-0.04646831378340721,0.03844833746552467,393
-76.49365234890963,-0.058387432247400284,0.03881101682782173,394
-26.00982273449468,-0.015101365745067596,0.01956385001540184,395
-0.4574479528929952,3.942980038118549e-05,4.657225417759037e-06,396
-139.76015879950236,-0.11630292981863022,0.1931106150150299,397
-143.9893110638805,0.015646863728761673,0.10971914231777191,398
-71.05169875612877,0.0429181307554245,0.05598609894514084,399
-75.78554000225454,0.017098909243941307,0.07171717286109924,400
-140.42934462040597,-0.04376155883073807,0.03222441300749779,401
-119.27542543770997,0.001271026092581451,0.03260697051882744,402
-88.32552833007593,-0.002761855721473694,0.04469187557697296,403
-133.12700947132333,-0.036382466554641724,0.09155251830816269,404
-103.9428827704972,0.010799353010952473,0.04444997385144234,405
-159.53074449517473,-0.05543727055191994,0.009400471113622189,406
-122.80377675255926,-0.017933443188667297,0.030141081660985947,407
-112.6544836856402,-0.0321430042386055,0.03212879225611687,408
-120.35623137096026,-0.033010054379701614,0.015599047765135765,409
-109.35014813615788,-0.04912981018424034,0.023191170766949654,410
-102.08778099257555,-0.03018512763082981,0.030210718512535095,411
-135.6133411839989,-0.020171955227851868,0.012096576392650604,412
-129.32534677738923,-0.019673597067594528,0.011444742791354656,413
-119.09527408877132,-0.021501565352082253,0.016383672133088112,414
-104.37976331428638,-0.03095945529639721,0.0268239788711071,415
-116.69949880179288,-0.056258317083120346,0.06633973121643066,416
-157.83978196077464,-0.030293058604002,0.011732975021004677,417
-103.91988594742278,0.008761337026953697,0.03878147900104523,418
-135.52987079593086,-0.03915306180715561,0.013181419111788273,419
-143.76345583011621,-0.024639803916215897,0.01082430500537157,420
-141.38191986892488,-0.02614441327750683,0.007252200972288847,421
-131.6835091961132,-0.018774695694446564,0.01054925937205553,422
-93.09948145434258,0.007620969321578741,0.04173766449093819,423
-158.41255265351637,-0.028101211413741112,0.017536787316203117,424
-98.84931397530259,0.00412228237837553,0.06125400960445404,425
-90.37808834600935,0.004088857676833868,0.056085217744112015,426
-125.37931915407749,-0.009870398789644241,0.013467431999742985,427
-129.29451671929962,-0.023519637063145638,0.01100924238562584,428
-109.88070472978701,-0.025471951812505722,0.03275856003165245,429
-96.66830856070594,-0.00995543971657753,0.02680511772632599,430
-148.91799233505898,-0.01761895976960659,0.012967939488589764,431
-157.5145294219545,-0.03992370143532753,0.012409992516040802,432
-159.71196201663983,-0.043164242058992386,0.0036352372262626886,433
-136.83785317362901,-0.032877322286367416,0.025246532633900642,434
-129.07677065876854,-0.002297729253768921,0.017571426928043365,435
-128.337435706807,-0.029865972697734833,0.015932030975818634,436
-138.65016528196912,-0.028669025748968124,0.008770613931119442,437
-159.6835156547871,-0.028522877022624016,0.01177776139229536,438
-150.90629179722072,-0.023535024374723434,0.010596318170428276,439
-145.18788803917926,-0.03319789469242096,0.012282084673643112,440
-159.83600583363466,-0.037466853857040405,0.00470575550571084,441
-120.27080538895002,-0.025502696633338928,0.07311707735061646,442
-132.89509175998526,-0.011780848726630211,0.016009604558348656,443
-151.81425909888355,-0.02975558303296566,0.010802528820931911,444
-158.46746130269918,-0.046469975262880325,0.006291951984167099,445
-113.62467869144047,-0.009485301561653614,0.0542246550321579,446
-127.36422947529496,-0.0037976971361786127,0.01990300416946411,447
-121.91919779232603,-0.023358898237347603,0.027330031618475914,448
-89.28235436575278,-0.012660696171224117,0.04656204208731651,449
-148.30799913327232,-0.018588846549391747,0.021362729370594025,450
-143.4543282129579,-0.037606529891490936,0.020675456151366234,451
-114.32844215772508,-0.01934315636754036,0.04110048711299896,452
-141.88129762616921,-0.02802802063524723,0.01503519807010889,453
-129.2503161664671,-0.03021426685154438,0.017883753404021263,454
-120.62319227316313,-0.017387397587299347,0.02414742484688759,455
-117.30791885104891,-0.01613880693912506,0.02275879867374897,456
-157.0457753747779,-0.022862570360302925,0.014477331191301346,457
-107.9812245117222,-0.037923287600278854,0.04696270450949669,458
-114.55609954596484,-0.025405315682291985,0.027229860424995422,459
-105.027528237553,-0.027183957397937775,0.03568172827363014,460
-105.30925318410367,-0.012113668955862522,0.03603820502758026,461
-132.7898618630805,-0.016477437689900398,0.02315354347229004,462
-140.47949891598554,-0.01893824152648449,0.01675788126885891,463
-131.4289571880956,-0.02234606444835663,0.025300854817032814,464
-115.68304760435333,-0.011969472281634808,0.0318298414349556,465
-143.68007278172726,-0.03444080427289009,0.01601318269968033,466
-102.93201764920735,-0.0030736643821001053,0.05537399277091026,467
-120.08156888409654,-0.004310398828238249,0.026737062260508537,468
-158.3828158322219,-0.022283172234892845,0.017530949786305428,469
-130.9686053926808,-0.09679147601127625,0.19618667662143707,470
-104.43565710843941,0.023985719308257103,0.054824113845825195,471
-150.8319972272468,-0.03582534193992615,0.015082096680998802,472
-124.29911093365494,-0.003401998896151781,0.02254103682935238,473
-120.77224410606729,-0.006498170550912619,0.0240518469363451,474
-126.46661994865838,-0.03479928523302078,0.02397235482931137,475
-139.1149242807633,-0.01937275566160679,0.009717773646116257,476
-142.546401936936,-0.021950852125883102,0.010231674648821354,477
-95.85089912604417,-0.00548981549218297,0.048925403505563736,478
-107.85296023783714,0.0016318452544510365,0.030028043314814568,479
-119.03653496800409,-0.019679594784975052,0.023646151646971703,480
-159.28282492252322,-0.04855602607131004,0.01901816762983799,481
-113.05835279564188,-0.0344134196639061,0.059326328337192535,482
-140.5039039038542,-0.03065495938062668,0.013328935950994492,483
-140.53442508041235,-0.021846134215593338,0.01192952785640955,484
-137.2827547099211,-0.015718191862106323,0.018312832340598106,485
-117.58628768991242,-0.044932566583156586,0.03964213281869888,486
-121.25903569402358,-0.03715162351727486,0.027324898168444633,487
-129.6133254285075,-0.032355472445487976,0.01864064671099186,488
-159.65994531790597,-0.05241500958800316,0.012681905180215836,489
-126.16926321199202,-0.012690901756286621,0.0644230991601944,490
-135.90973930217606,-0.0094911465421319,0.019452156499028206,491
-136.8541681022612,-0.01824874058365822,0.019122276455163956,492
-137.4035503217135,-0.014235062524676323,0.023464588448405266,493
-136.47030312281902,-0.019507575780153275,0.01940569467842579,494
-139.8298566668537,-0.026058034971356392,0.011374448426067829,495
-140.15553100932942,-0.01734858565032482,0.013210110366344452,496
-128.5929624367314,-0.047601234167814255,0.02908635325729847,497
-107.43755655811081,-0.0371660552918911,0.06429874151945114,498
-97.75083918907556,-0.017936645075678825,0.04431864619255066,499
================================================
FILE: history/ac_sparse.csv
================================================
Episode_reward,actor_loss,critic_loss,episode
46.0,0.15708111226558685,10.703339576721191,0
11.0,-0.28331494331359863,19.787118911743164,1
24.0,0.07914095371961594,2.0553085803985596,2
11.0,-0.16641865670681,6.423779487609863,3
14.0,0.00206198007799685,3.0836663246154785,4
24.0,-0.007437457796186209,1.3668951988220215,5
15.0,0.07684779167175293,1.3325746059417725,6
10.0,-0.14787694811820984,1.9680309295654297,7
11.0,-0.5541443228721619,11.497489929199219,8
15.0,0.046119723469018936,1.164766788482666,9
27.0,0.18693281710147858,0.9977313280105591,10
25.0,-0.02487783506512642,3.426077365875244,11
15.0,0.09097448736429214,1.7190871238708496,12
12.0,-0.11516990512609482,1.7416564226150513,13
13.0,0.11680031567811966,0.9986221790313721,14
11.0,-0.21710120141506195,1.3468047380447388,15
18.0,-0.1584535390138626,6.159326076507568,16
49.0,-0.02433660998940468,2.5991287231445312,17
18.0,-0.016491122543811798,9.629115104675293,18
27.0,0.1658116728067398,1.2685333490371704,19
16.0,0.09101071953773499,0.8594657778739929,20
11.0,-0.11546719819307327,3.541494846343994,21
21.0,0.09801877290010452,1.6491456031799316,22
10.0,0.05944998189806938,1.2436825037002563,23
49.0,0.08347591012716293,1.292287826538086,24
23.0,-0.10531073063611984,1.7366694211959839,25
25.0,0.042967263609170914,1.4112411737442017,26
14.0,-0.1030559092760086,0.5424199104309082,27
13.0,0.09419719129800797,0.5872083902359009,28
11.0,0.0714622437953949,0.25870734453201294,29
28.0,-0.11955034732818604,3.0150628089904785,30
23.0,0.12160929292440414,1.2548524141311646,31
15.0,0.022263940423727036,1.052060842514038,32
23.0,0.13196411728858948,0.8963538408279419,33
15.0,-0.05559730529785156,0.8238027691841125,34
15.0,0.03702358528971672,0.7466095089912415,35
27.0,0.049668923020362854,1.0461552143096924,36
29.0,0.007565909530967474,2.3898000717163086,37
12.0,0.010593235492706299,0.8240801692008972,38
57.0,0.06253640353679657,0.4960484504699707,39
14.0,-0.11316702514886856,0.6910349726676941,40
36.0,-0.05576689541339874,1.8883126974105835,41
31.0,-0.06678037345409393,0.7510959506034851,42
11.0,0.12745854258537292,0.9076582193374634,43
26.0,0.08652591705322266,0.9330106973648071,44
39.0,0.0007638564566150308,1.2670847177505493,45
14.0,-0.1532670110464096,2.0817651748657227,46
15.0,-0.18648196756839752,2.0914254188537598,47
58.0,0.02371405065059662,0.874768853187561,48
16.0,-0.1611773818731308,3.048701763153076,49
78.0,-0.08101316541433334,0.7750633358955383,50
108.0,0.00018569054373074323,0.6708148121833801,51
20.0,-0.04658918455243111,1.9875085353851318,52
10.0,0.15146510303020477,0.538233757019043,53
51.0,0.07100915163755417,0.3324044644832611,54
14.0,-0.21520325541496277,1.1971681118011475,55
65.0,-0.06360554695129395,0.9158459305763245,56
102.0,-0.007419256493449211,1.1813948154449463,57
22.0,0.28063786029815674,0.49648362398147583,58
99.0,0.059428099542856216,0.7366203665733337,59
13.0,-0.26603060960769653,1.712687373161316,60
28.0,0.055799055844545364,1.30489182472229,61
28.0,-0.03550821915268898,1.184736967086792,62
40.0,-0.1208341121673584,1.2666234970092773,63
50.0,-0.010878819972276688,0.788481593132019,64
102.0,-0.07855198532342911,0.6961331963539124,65
78.0,-0.002004120498895645,1.04771888256073,66
151.0,-0.01021483726799488,0.19554193317890167,67
63.0,-0.017089037224650383,0.19626514613628387,68
87.0,-0.036519408226013184,0.24972231686115265,69
34.0,-0.08846504986286163,2.332099437713623,70
24.0,0.09107737988233566,1.4093512296676636,71
37.0,0.028210144490003586,1.1828194856643677,72
35.0,0.03814493492245674,0.9228385090827942,73
26.0,-0.1467811018228531,0.7876605987548828,74
23.0,-0.19650426506996155,0.4523639678955078,75
35.0,-0.12570612132549286,1.0840811729431152,76
140.0,-0.07352928072214127,0.5347028970718384,77
62.0,0.041730135679244995,0.7461478114128113,78
28.0,-0.11057548969984055,1.640944242477417,79
13.0,-0.28680145740509033,1.413116216659546,80
46.0,-0.09727904945611954,0.8366326093673706,81
105.0,0.006618705112487078,0.1780693084001541,82
135.0,-0.07727474719285965,0.38812872767448425,83
111.0,-0.04614066705107689,0.49178263545036316,84
180.0,-0.027585316449403763,0.2371954768896103,85
124.0,-0.032976407557725906,0.387373149394989,86
127.0,-0.0004946341505274177,0.08610803633928299,87
87.0,-0.05440782010555267,0.5575460195541382,88
184.0,-0.02407199889421463,0.1114669069647789,89
198.0,-0.02366279438138008,0.09785211086273193,90
106.0,-0.04604828730225563,0.36168915033340454,91
200.0,-0.015499483793973923,0.28062817454338074,92
129.0,0.022404268383979797,0.34456419944763184,93
200.0,-0.041753362864255905,0.17765121161937714,94
200.0,-0.07258912920951843,0.3294238746166229,95
200.0,0.0015863787848502398,0.3673955202102661,96
59.0,-0.15577536821365356,0.9091365337371826,97
123.0,-0.011310129426419735,0.36533528566360474,98
76.0,-0.05190511420369148,0.20350292325019836,99
142.0,-0.06325507164001465,0.34562650322914124,100
138.0,-0.0152218546718359,0.39639076590538025,101
108.0,-0.050525687634944916,0.520297110080719,102
120.0,-0.014246219769120216,0.38258811831474304,103
22.0,-0.13176153600215912,3.684641122817993,104
23.0,0.07973098009824753,1.6745266914367676,105
18.0,-0.13841216266155243,1.9822711944580078,106
29.0,-0.22570228576660156,1.7687430381774902,107
19.0,-0.03758680075407028,2.131235122680664,108
24.0,0.1342272162437439,1.9011210203170776,109
22.0,0.19661901891231537,2.4401755332946777,110
21.0,-0.056327104568481445,1.658852219581604,111
18.0,-0.35678890347480774,2.365894317626953,112
19.0,0.030785605311393738,1.9986851215362549,113
22.0,0.04046151787042618,1.9621682167053223,114
14.0,-0.10542023926973343,2.52258038520813,115
15.0,-0.19289381802082062,1.866510272026062,116
19.0,-0.07956085354089737,1.8327311277389526,117
12.0,-0.4135580062866211,2.4335200786590576,118
11.0,-0.030774343758821487,0.5609723329544067,119
13.0,0.035763032734394073,0.9815815687179565,120
13.0,0.005579260643571615,1.1195242404937744,121
14.0,-0.06337376683950424,1.74441397190094,122
16.0,-0.0284061711281538,1.287758708000183,123
17.0,-0.23556436598300934,1.4078991413116455,124
17.0,-0.17262782156467438,0.38084855675697327,125
16.0,-0.05455147475004196,0.7156497836112976,126
15.0,-0.2710554897785187,0.6604633927345276,127
27.0,-0.10968874394893646,1.159364938735962,128
23.0,-0.10418415069580078,0.6072282791137695,129
27.0,-0.08909005671739578,0.46529078483581543,130
14.0,-0.42565593123435974,0.6341150403022766,131
43.0,-0.03900247439742088,0.6616441607475281,132
30.0,-0.13474325835704803,0.7914584279060364,133
139.0,-0.14631320536136627,0.7484360933303833,134
29.0,-0.05782928317785263,1.3499945402145386,135
134.0,-0.056201860308647156,0.6385849118232727,136
121.0,-0.011597949080169201,0.551180362701416,137
121.0,-0.006616365630179644,0.3687339425086975,138
200.0,-0.0016885180957615376,0.5285871028900146,139
200.0,0.01933862827718258,0.49519073963165283,140
117.0,0.025889186188578606,0.6685908436775208,141
200.0,0.013803060166537762,0.5013015270233154,142
118.0,0.03277798369526863,0.3231470286846161,143
110.0,0.02277827262878418,0.25937753915786743,144
199.0,-0.03243868425488472,0.11418214440345764,145
32.0,0.0009617768228054047,3.5205748081207275,146
20.0,0.2582176625728607,1.6396430730819702,147
92.0,0.00787483062595129,0.1949927657842636,148
29.0,-0.16344903409481049,3.3004753589630127,149
20.0,0.10707885026931763,2.134247303009033,150
15.0,-0.17798778414726257,2.2475526332855225,151
15.0,-0.14601756632328033,2.5585546493530273,152
16.0,-0.33845874667167664,2.1405718326568604,153
24.0,-0.2621995508670807,1.6703158617019653,154
13.0,0.139850914478302,2.7419891357421875,155
20.0,-0.286944717168808,1.7754749059677124,156
21.0,-0.17014659941196442,1.8928807973861694,157
18.0,-0.4594283103942871,2.2065176963806152,158
25.0,0.07775726169347763,1.7216529846191406,159
25.0,-0.0004685783351305872,1.811466097831726,160
26.0,-0.07042773813009262,1.722166657447815,161
18.0,0.11300940066576004,2.355417490005493,162
19.0,0.20305494964122772,2.207050085067749,163
21.0,0.08928683400154114,1.6309477090835571,164
21.0,0.10732945054769516,1.776181697845459,165
20.0,0.08598322421312332,1.7819526195526123,166
18.0,0.11096955835819244,1.8649431467056274,167
21.0,0.12201087921857834,1.678673505783081,168
24.0,0.019334256649017334,1.5406007766723633,169
18.0,0.04419267177581787,2.0192480087280273,170
18.0,-0.35479557514190674,1.848196268081665,171
21.0,0.06767965108156204,1.5673719644546509,172
27.0,0.048630241304636,1.358237385749817,173
13.0,0.1493222862482071,2.587765693664551,174
13.0,-0.1220095157623291,2.1480531692504883,175
17.0,0.19160790741443634,1.5259854793548584,176
19.0,-0.01122245006263256,1.432834267616272,177
17.0,-0.0614984929561615,1.6416468620300293,178
16.0,0.12375448644161224,1.671883463859558,179
16.0,0.08760067075490952,1.731959342956543,180
20.0,0.06143452972173691,1.328629493713379,181
15.0,0.0514119453728199,1.7316960096359253,182
15.0,-0.465212881565094,1.5180261135101318,183
19.0,-0.009349101223051548,1.2548789978027344,184
16.0,0.0371192991733551,1.4884580373764038,185
17.0,0.04294077679514885,1.283705711364746,186
17.0,-0.11588123440742493,1.42438542842865,187
14.0,-0.046003036201000214,1.7050822973251343,188
18.0,0.07881927490234375,1.1381348371505737,189
20.0,0.05295684188604355,1.2035560607910156,190
20.0,0.06974686682224274,1.0754977464675903,191
22.0,0.04955059662461281,1.0104645490646362,192
20.0,0.04513952136039734,0.9284613728523254,193
27.0,0.06987472623586655,0.7489845752716064,194
19.0,0.05672299116849899,0.8745599985122681,195
23.0,0.012632045894861221,0.6619858741760254,196
19.0,0.08306790143251419,0.5852164626121521,197
24.0,-0.15606975555419922,0.5512746572494507,198
25.0,-0.008989603258669376,0.5733245015144348,199
22.0,-0.046394262462854385,0.6129249930381775,200
29.0,0.005445077549666166,0.3126966655254364,201
32.0,0.020901143550872803,0.16898809373378754,202
33.0,0.0539248064160347,0.2498863935470581,203
32.0,-0.022294219583272934,0.5785260796546936,204
37.0,0.03526824340224266,0.2021041065454483,205
33.0,-0.020538868382573128,0.40824177861213684,206
97.0,-0.09483183920383453,0.7134478092193604,207
38.0,0.061337921768426895,0.920579731464386,208
111.0,-0.06470271199941635,0.3860238194465637,209
40.0,0.029538234695792198,1.1659181118011475,210
29.0,-0.37939706444740295,1.291352391242981,211
32.0,0.03548508882522583,1.0397565364837646,212
40.0,0.03945370391011238,0.8570400476455688,213
33.0,0.02049228921532631,0.8838012218475342,214
42.0,0.023618122562766075,0.7098052501678467,215
106.0,-0.03726230189204216,0.6060228943824768,216
117.0,0.071341373026371,0.2713908851146698,217
117.0,-0.08965490758419037,0.364866703748703,218
118.0,-0.05872584879398346,0.4178122878074646,219
200.0,-0.02451316826045513,0.42843201756477356,220
200.0,-0.042136091738939285,0.4635346233844757,221
200.0,0.008176042698323727,0.42871609330177307,222
151.0,0.00855812057852745,0.4203112721443176,223
148.0,0.026913482695817947,0.5899422764778137,224
33.0,-0.048606131225824356,1.609865665435791,225
111.0,0.0280837994068861,0.3687347173690796,226
141.0,-0.019896384328603745,0.2982586920261383,227
103.0,0.01496926974505186,0.5740676522254944,228
58.0,0.0004995541530661285,1.1529173851013184,229
59.0,-0.06141338497400284,1.0899021625518799,230
78.0,0.03473882004618645,0.3658626675605774,231
54.0,-0.07987667620182037,1.1329048871994019,232
66.0,0.016411136835813522,0.7592692375183105,233
61.0,0.002788575366139412,0.5661239624023438,234
55.0,-0.021031592041254044,0.41448068618774414,235
82.0,-0.016050269827246666,0.15551184117794037,236
68.0,-0.16944904625415802,0.6102570295333862,237
116.0,0.020207425579428673,0.577196478843689,238
137.0,-0.021703872829675674,0.682037889957428,239
200.0,0.009493929333984852,0.4484395980834961,240
200.0,0.023500967770814896,0.5558210611343384,241
120.0,-0.0016839206218719482,0.5028929114341736,242
116.0,0.006039291620254517,0.7781342267990112,243
128.0,-0.013841862790286541,0.40437906980514526,244
30.0,0.061143286526203156,2.245932102203369,245
29.0,0.07970838248729706,1.4847197532653809,246
24.0,0.03370336815714836,1.729762077331543,247
38.0,0.039085377007722855,1.3021458387374878,248
27.0,-0.09008951485157013,1.584365725517273,249
25.0,0.03452019393444061,1.662104606628418,250
16.0,0.040816254913806915,2.1942737102508545,251
27.0,-0.0062103536911308765,1.2955496311187744,252
17.0,-0.3921051621437073,1.918212652206421,253
25.0,-0.1099080815911293,1.3688440322875977,254
17.0,-0.20716571807861328,1.7599519491195679,255
15.0,0.014386606402695179,1.8060944080352783,256
18.0,0.03820337355136871,1.3439362049102783,257
18.0,0.05200354754924774,1.298297643661499,258
18.0,0.05704142153263092,1.2682714462280273,259
18.0,0.043020494282245636,1.2042254209518433,260
13.0,-0.002292733872309327,1.3885078430175781,261
18.0,-0.10351388901472092,1.1175013780593872,262
16.0,0.02520008012652397,1.2497649192810059,263
24.0,0.10005620121955872,0.9423577785491943,264
22.0,0.0005124238668940961,1.045102596282959,265
16.0,-0.43298012018203735,1.250591516494751,266
29.0,0.15496517717838287,0.7197147011756897,267
25.0,0.02396712824702263,0.8432828783988953,268
27.0,-0.04531298577785492,0.8296579718589783,269
32.0,0.06497931480407715,0.593549370765686,270
28.0,0.03618904948234558,0.5753760933876038,271
43.0,0.06439494341611862,0.3444869816303253,272
35.0,-0.03405074402689934,0.6553113460540771,273
34.0,0.02783866412937641,0.3712359368801117,274
48.0,0.018136590719223022,0.26566246151924133,275
52.0,0.010908638127148151,0.23558005690574646,276
34.0,0.017459429800510406,0.32277819514274597,277
47.0,0.008845859207212925,0.29398858547210693,278
47.0,-0.09002931416034698,0.3470979630947113,279
64.0,0.03885677456855774,0.17960606515407562,280
72.0,-0.00215338496491313,0.26457464694976807,281
101.0,-0.032796621322631836,0.5474420189857483,282
142.0,0.057146307080984116,0.12398028373718262,283
198.0,-0.05464121326804161,0.47431960701942444,284
200.0,-0.0029543363489210606,0.5001184344291687,285
200.0,0.0036154913250356913,0.35193514823913574,286
152.0,-0.007570174988359213,0.20036949217319489,287
200.0,0.0020136034581810236,0.4522154927253723,288
200.0,-0.021602289751172066,0.373005747795105,289
120.0,0.004556996747851372,0.14547160267829895,290
115.0,0.008248458616435528,0.07245007902383804,291
114.0,0.007396819069981575,0.10600382089614868,292
128.0,-0.033319175243377686,0.20940788090229034,293
24.0,-0.10253982990980148,4.056668281555176,294
24.0,0.19286853075027466,0.7121522426605225,295
20.0,-0.05594431608915329,1.511155605316162,296
25.0,-0.006276942323893309,1.1292327642440796,297
39.0,0.03760023042559624,0.8104657530784607,298
100.0,-0.05639946088194847,0.2665490508079529,299
================================================
FILE: history/ddpg.csv
================================================
Episode_reward,Loss,episode
-1509.9431573465693,9.224396642297506,0
-1865.7656558462838,0.2480768134398386,1
-1285.631963262328,0.9022780474415049,2
-1305.4174017643334,2.977207661359571,3
-1382.4000812392546,4.911441958639771,4
-1062.575140308041,7.437465053331107,5
-1463.8546658484654,10.082108230926096,6
-1223.369614812585,15.84260846812278,7
-952.641334606606,21.111351540870963,8
-1188.393523377037,21.4787728536129,9
-1336.629206109055,20.15197231501341,10
-1526.5977045860482,27.844334089905022,11
-1284.4889842946702,30.666733276881278,12
-1602.4901520485619,33.38175935223698,13
-1506.6540408292062,34.42411740764975,14
-1388.615317798267,29.513498384170234,15
-1283.3422825947046,44.503813500329855,16
-1631.0748155793688,43.637542406357824,17
-1218.7571524464163,38.54312503609806,18
-1411.6802759192415,50.128491373732686,19
-1200.9221829226055,49.92026059575379,20
-1230.2951272159905,54.2671976512298,21
-1115.6587490790553,53.428198878541586,22
-1114.8459866674827,53.73677407834679,23
-1547.5155195785896,63.951403808966276,24
-1063.3868299522057,65.53570614930243,25
-898.8103642220121,69.60633171230555,26
-1772.604491489826,56.947230311110616,27
-1184.3980703948423,56.36146053176373,28
-759.2921595575442,56.576132576465604,29
-659.4782017407694,56.29647995978594,30
-861.2114618572491,58.280869005098936,31
-868.5843914093462,60.638545464910564,32
-1055.8006086159235,61.7349532661587,33
-968.1073123743865,47.02249816864729,34
-981.43887368482,54.137102286964655,35
-761.0594396606434,34.231038222238425,36
-1577.5114653357641,46.65215977922082,37
-1180.6829526873669,41.02836825296283,38
-624.3891328249243,45.36802312284708,39
-627.0876307311714,42.32168261505663,40
-1297.0037830418707,41.261543742641805,41
-1322.6100932334898,35.52373902335763,42
-849.5575525740325,31.8621961453557,43
-1081.1936754523663,35.82060529649257,44
-626.4815883735374,34.470040532425045,45
-744.5436034922028,27.803453820869326,46
-1463.0895354655895,37.31040966331959,47
-858.4114601314827,23.049129573255776,48
-1196.8466105893212,25.310880394652486,49
-1221.4214918479825,34.453879231885075,50
-635.5322105411383,29.389354399591685,51
-630.4519218527109,28.666258409991862,52
-1185.2536593137788,28.31704713396728,53
-764.4381632336194,22.21421483851969,54
-1313.2115576328438,24.51629433467984,55
-858.4827687078472,32.937019220255316,56
-741.389829745495,33.518929943814875,57
-637.7492933687442,22.058465831577777,58
-1565.3534979594217,23.153870964720845,59
-1054.3859100346278,25.502650653645397,60
-1770.917859770205,28.80989343907684,61
-810.2497811602519,28.819833066910505,62
-1057.2006563891528,26.13691723205149,63
-1050.428922766867,24.413474108949302,64
-874.1548399039127,28.675858917608856,65
-1238.262404563758,32.64708211489022,66
-1298.1190547629403,40.3973752079159,67
-964.2918273021526,28.06319517850876,68
-1235.3605769688743,29.70577700898051,69
-1172.5004264568718,30.53858146555722,70
-1168.3332468058768,34.81271961562336,71
-1055.7145165992451,25.386454262509943,72
-1003.6498049096768,40.83339177712798,73
-1024.3221521298358,35.24092298142612,74
-1042.4980651047051,41.469298264682294,75
-1039.9132596264599,37.89584819301963,76
-1017.8303287403377,34.35798047445714,77
-987.178003404745,42.34256130099297,78
-922.4054072981648,42.71066652163863,79
-1086.5982370765053,37.53961860656738,80
-1001.2568111933108,44.20261692024767,81
-1039.9148148626332,39.62542889393866,82
-1014.7862915770123,38.702078479006886,83
-901.3799970078131,38.10230468176305,84
-960.5569569363169,34.37458169274032,85
-885.014240277523,42.73001478083432,86
-878.3993387976839,35.681683946698904,87
-870.4344527375974,42.747159790098664,88
-1286.4680655815541,31.279732630178334,89
-743.9724402397385,39.56026891417801,90
-1440.5815302829,38.7252170599252,91
-887.5124442350698,35.39897967994213,92
-853.2025954302526,32.64205513432622,93
-1107.243661383217,34.91410423785448,94
-629.0496861884772,34.88018217563629,95
-595.7645291736038,33.26384668022394,96
-760.4614553011925,27.939529517665505,97
-1012.3462343371923,28.840708961933853,98
-878.0639777420679,30.402641356289386,99
-880.5633393828522,24.96170378655195,100
-1056.214777710658,23.492589546144007,101
-893.3955082801795,25.264478589743376,102
-763.1320657960539,24.1388235180825,103
-505.50097175783213,23.13241209179163,104
-553.7723736130598,26.654008081033826,105
-742.5199257151457,26.6922037550807,106
-961.3578975972842,23.324770770967007,107
-795.0068184016638,26.934202596992254,108
-753.3143419930043,26.45864893272519,109
-749.7826401513026,23.12310247182846,110
-1181.0581205410153,22.38455956786871,111
-748.4255588221755,22.340058853551746,112
-734.3846204042682,22.7055980373919,113
-878.3373223390909,25.161819908469916,114
-376.70457557936993,19.546717322915793,115
-508.5545705869219,20.56222064897418,116
-503.425602490802,17.00599652223289,117
-375.91923117552994,24.69850500717759,118
-249.37784986808146,18.53221425265074,119
-1230.7925881337471,18.968866907656192,120
-1047.3458950014876,19.263563225716354,121
-520.5705180139792,19.088095309063792,122
-127.41132204350131,16.680690984129907,123
-494.0516561225265,14.114880212694407,124
-241.7952363379058,15.163003908470273,125
-622.1498929576501,13.860611055344343,126
-126.28689389993978,10.743532688766718,127
-481.7462220794856,12.154294685646892,128
-252.0518702878864,8.22040078625083,129
-1058.259944433226,10.099088204875589,130
-1175.9008885759447,10.603743339329958,131
-1380.0705786753408,10.230509576946497,132
-778.3397305876816,15.391569518595935,133
-133.46953696397125,14.409788343980908,134
-900.7765687642241,15.92464128397405,135
-865.3962979056647,12.270800278410315,136
-895.7349032156494,9.966138138473035,137
-637.3153610072421,14.353773091882466,138
-643.3822494854851,12.138147334828973,139
-529.7190572819877,11.232233720272779,140
-267.4607062952199,11.33184901200235,141
-381.90410426841044,8.22482607923448,142
-131.40242751324504,7.3245576003938915,143
-372.0020306586164,9.024972496032715,144
-117.72238854186023,9.184057947322726,145
-127.93529934947,9.165707747861743,146
-649.3362683996553,12.506398611739279,147
-124.71662549383197,9.226185390949249,148
-125.02271326676247,10.296387278512121,149
-2.9754935149676016,12.134869307279587,150
-123.54315765522725,10.784451853185892,151
-121.59970310844948,12.01533182270825,152
-127.83470800816228,9.180831761807204,153
-128.07100431495167,9.951997726485134,154
-244.87853058154593,11.53757042095065,155
-387.62244377800465,12.070866116993129,156
-380.34783270446457,13.931512423604726,157
-0.2049996884126917,17.255584732964635,158
-0.1618008890243681,21.664329013191164,159
-259.02778073541367,25.850017339736223,160
-401.527916216881,20.795998985096812,161
-119.79479164027957,22.34058747135103,162
-1.099818418403321,20.18182430770248,163
-369.6545190068,16.527924857279285,164
-334.81240334087715,15.65822378879413,165
-342.41405521558937,13.056955074761063,166
-0.5857461587394992,11.401023681247606,167
-260.2752873027791,9.589962152335794,168
-261.66037029388485,7.553395981565118,169
-129.56440024749517,5.8495552339963615,170
-258.08730583463785,5.231778765879572,171
-120.20548753691581,4.1807065053051335,172
-260.95698845711877,3.291417717039585,173
-389.37460725626335,2.9867228827252985,174
-1187.125597823911,2.858089001998305,175
-114.04855440980137,2.766956553393975,176
-250.30580205457662,2.595750063546002,177
-0.3014525534964593,2.9701660694740712,178
-1493.5148444234912,2.094745456716046,179
-388.5575184518914,3.2538931934162973,180
-118.72923345772811,7.536445793844759,181
-1130.9116601613175,5.056547564156354,182
-118.80008688384058,7.5332822824828325,183
-133.28311695911438,7.767288487832993,184
-1128.0453572089707,7.902463755961508,185
-4.918048537370808,9.419822578150779,186
-507.91622762502357,13.655524237100035,187
-116.65599103879406,11.857215057630093,188
-119.57400403798566,12.931360413730145,189
-126.60727857129913,10.384384954106062,190
-130.69187463526168,11.276750937402248,191
-532.6094861649789,9.913348460327834,192
-119.40447754106185,10.843797551989555,193
-118.01457640625543,15.639693554751576,194
-125.38472296989859,14.797974425330757,195
-0.7717357914055901,9.847735727513209,196
-117.80949351393008,13.156731079723686,197
-117.81753855241199,8.287331903707235,198
-244.8041840890352,8.84490654880181,199
================================================
FILE: history/ddqn.csv
================================================
Episode_reward,Loss,episode
11.0,inf,0
12.0,0.4681931138038635,5
15.0,1.190276861190796,10
12.0,1.9253158569335938,15
14.0,2.1662869453430176,20
14.0,0.3513646721839905,25
13.0,1.5280985832214355,30
9.0,7.641080379486084,35
13.0,5.177434921264648,40
37.0,0.32416990399360657,45
30.0,3.0215706825256348,50
36.0,2.573920726776123,55
47.0,2.4598228931427,60
47.0,0.20579954981803894,65
59.0,0.9766868948936462,70
114.0,0.021512869745492935,75
78.0,0.020475244149565697,80
141.0,0.018932819366455078,85
200.0,0.31822460889816284,90
198.0,0.03797835111618042,95
191.0,0.014194854535162449,100
182.0,0.013047108426690102,105
176.0,2.1590566635131836,110
155.0,0.4268774092197418,115
157.0,0.024365421384572983,120
150.0,0.010818028822541237,125
143.0,0.6679432392120361,130
137.0,0.012195337563753128,135
139.0,0.016057107597589493,140
133.0,0.006115872412919998,145
130.0,0.011953135952353477,150
139.0,0.00645385030657053,155
147.0,0.014706989750266075,160
150.0,0.03395920991897583,165
132.0,0.006384558044373989,170
147.0,0.0018864702433347702,175
168.0,0.006037265993654728,180
147.0,0.003880517091602087,185
145.0,0.0685364305973053,190
139.0,0.039683014154434204,195
147.0,0.026838181540369987,200
148.0,0.0033504890743643045,205
153.0,0.027849620208144188,210
148.0,0.0022808443754911423,215
142.0,0.004483409691601992,220
153.0,0.012921614572405815,225
178.0,0.021095992997288704,230
145.0,0.016173984855413437,235
180.0,0.005053142085671425,240
189.0,2.571658134460449,245
200.0,0.005489872302860022,250
200.0,0.003300785319879651,255
200.0,0.010993970558047295,260
200.0,0.02414834313094616,265
200.0,0.0550992377102375,270
200.0,0.028599431738257408,275
200.0,0.04435380548238754,280
200.0,0.07636132836341858,285
200.0,0.02288103848695755,290
161.0,0.026148678734898567,295
182.0,0.046217601746320724,300
200.0,0.09286589920520782,305
200.0,0.027517562732100487,310
200.0,0.03821772336959839,315
200.0,0.013089239597320557,320
200.0,5.8109049797058105,325
11.0,0.007258770987391472,330
200.0,0.09761332720518112,335
200.0,3.223811388015747,340
200.0,0.03628034517168999,345
200.0,0.05087439715862274,350
200.0,0.029234319925308228,355
200.0,0.013562529347836971,360
200.0,5.093896865844727,365
200.0,0.015044460073113441,370
185.0,0.012767013162374496,375
160.0,4.742649555206299,380
200.0,1.0178560018539429,385
200.0,0.08182927221059799,390
200.0,0.01739146187901497,395
200.0,0.012645881623029709,400
9.0,0.06882349401712418,405
39.0,0.09852810204029083,410
9.0,4.144737720489502,415
200.0,0.14002403616905212,420
200.0,1.0666762590408325,425
200.0,3.7281572818756104,430
200.0,0.03138595074415207,435
200.0,1.6102722883224487,440
200.0,0.039595987647771835,445
200.0,0.02767912670969963,450
183.0,0.012993409298360348,455
179.0,1.8382511138916016,460
200.0,0.01074942946434021,465
119.0,0.05010325461626053,470
125.0,0.04084702581167221,475
177.0,0.01885252445936203,480
167.0,0.5582287311553955,485
158.0,0.33888524770736694,490
200.0,0.03887341916561127,495
191.0,0.5189582705497742,500
200.0,4.817852973937988,505
200.0,0.011416086927056313,510
148.0,0.029537221416831017,515
140.0,0.018659140914678574,520
128.0,6.135469913482666,525
140.0,0.017004529014229774,530
141.0,0.07908019423484802,535
154.0,0.014127008616924286,540
136.0,0.11874083429574966,545
143.0,0.05152087286114693,550
200.0,0.010304479859769344,555
200.0,0.7502464056015015,560
157.0,0.06722593307495117,565
200.0,0.018013518303632736,570
200.0,5.614536762237549,575
200.0,0.035216063261032104,580
200.0,0.01069585606455803,585
200.0,0.04107843339443207,590
156.0,6.011622428894043,595
================================================
FILE: history/dueling.csv
================================================
Episode_reward,Loss,episode
30.0,inf,0
12.0,0.47862666845321655,5
10.0,0.5809901356697083,10
8.0,1.5168644189834595,15
10.0,6.394465446472168,20
11.0,5.444173336029053,25
11.0,6.27878999710083,30
11.0,6.766901969909668,35
10.0,0.267914354801178,40
27.0,0.7681541442871094,45
12.0,1.126452922821045,50
21.0,2.1125664710998535,55
13.0,1.1672120094299316,60
12.0,0.19179925322532654,65
28.0,1.5655790567398071,70
37.0,3.6122264862060547,75
25.0,0.4315088093280792,80
44.0,3.3004751205444336,85
54.0,3.665844440460205,90
43.0,0.26683512330055237,95
36.0,5.318463325500488,100
103.0,3.0524964332580566,105
50.0,1.7247264385223389,110
22.0,9.666183471679688,115
67.0,1.6773548126220703,120
141.0,0.22905239462852478,125
83.0,0.1352100670337677,130
98.0,0.08889307826757431,135
176.0,0.026787875220179558,140
150.0,0.06743741035461426,145
200.0,0.04945575073361397,150
200.0,0.026455968618392944,155
200.0,4.038379192352295,160
200.0,0.051251500844955444,165
200.0,0.018056631088256836,170
200.0,6.713091850280762,175
200.0,0.023181792348623276,180
200.0,4.3332624435424805,185
166.0,0.02410086989402771,190
200.0,0.11829394102096558,195
200.0,0.02812301553785801,200
96.0,2.3146820068359375,205
87.0,0.07561161369085312,210
121.0,0.11904805898666382,215
182.0,3.8535306453704834,220
159.0,0.8626901507377625,225
160.0,0.06850502640008926,230
194.0,0.04625943303108215,235
200.0,0.06545911729335785,240
200.0,0.02540893666446209,245
145.0,0.04876522347331047,250
200.0,0.0599154457449913,255
35.0,0.05168953537940979,260
200.0,0.04558023437857628,265
200.0,0.029182441532611847,270
200.0,2.2100436687469482,275
200.0,0.051673658192157745,280
200.0,4.435661792755127,285
200.0,0.043758101761341095,290
200.0,0.03457247465848923,295
200.0,0.019870392978191376,300
200.0,0.019909145310521126,305
9.0,0.1150365024805069,310
200.0,0.07036980986595154,315
71.0,0.05771002918481827,320
200.0,0.09849052131175995,325
200.0,0.045551616698503494,330
137.0,0.06604130566120148,335
11.0,0.09229154139757156,340
200.0,3.8334453105926514,345
200.0,0.02100004069507122,350
171.0,0.02557738684117794,355
200.0,5.395693302154541,360
200.0,0.021021392196416855,365
200.0,4.131258010864258,370
200.0,0.03307604417204857,375
172.0,0.059052661061286926,380
200.0,0.04051003232598305,385
200.0,0.018864136189222336,390
200.0,0.05375618487596512,395
39.0,0.0699274092912674,400
200.0,1.667770266532898,405
200.0,0.09974359720945358,410
155.0,4.035706520080566,415
157.0,0.07855170220136642,420
200.0,0.028804700821638107,425
162.0,1.9383234977722168,430
155.0,0.02189595438539982,435
200.0,0.014903474599123001,440
200.0,0.07733364403247833,445
187.0,0.022851835936307907,450
178.0,0.018380625173449516,455
131.0,0.04312446713447571,460
11.0,0.017067477107048035,465
200.0,0.07996176183223724,470
200.0,0.6626131534576416,475
200.0,0.019013769924640656,480
126.0,0.011018311604857445,485
185.0,0.0681515485048294,490
170.0,0.022165827453136444,495
200.0,0.025683466345071793,500
141.0,1.391233205795288,505
127.0,0.07900568842887878,510
200.0,0.13426055014133453,515
200.0,0.04910080507397652,520
200.0,5.457238674163818,525
116.0,0.046407174319028854,530
125.0,0.05579424649477005,535
176.0,0.07973217219114304,540
199.0,0.037510018795728683,545
142.0,0.04629438742995262,550
12.0,0.15213751792907715,555
10.0,0.025547455996274948,560
35.0,0.16393166780471802,565
196.0,0.10191859304904938,570
187.0,0.060145795345306396,575
188.0,0.028432875871658325,580
181.0,1.3723012208938599,585
200.0,0.035072945058345795,590
140.0,4.833233833312988,595
================================================
FILE: history/ndqn.csv
================================================
Episode_reward,Loss,episode
12.0,inf,0
39.0,0.5968765616416931,5
10.0,0.16437163949012756,10
8.0,0.8609060645103455,15
10.0,4.647477149963379,20
15.0,0.37436389923095703,25
18.0,1.042937159538269,30
32.0,0.25837475061416626,35
16.0,1.1858779191970825,40
29.0,0.13475389778614044,45
36.0,0.8968307375907898,50
75.0,0.09517502039670944,55
66.0,5.1964287757873535,60
169.0,0.8814995288848877,65
200.0,0.04609198123216629,70
200.0,0.07762521505355835,75
98.0,1.7214298248291016,80
66.0,0.02651367150247097,85
48.0,2.0342352390289307,90
89.0,0.04153415188193321,95
65.0,0.013355078175663948,100
90.0,0.026795051991939545,105
72.0,0.021573634818196297,110
106.0,0.02679624781012535,115
110.0,0.010724630206823349,120
200.0,0.9347834587097168,125
200.0,2.6067371368408203,130
200.0,0.03422386199235916,135
173.0,0.023121735081076622,140
74.0,0.03360900655388832,145
107.0,0.0349336713552475,150
105.0,3.056359052658081,155
89.0,1.5287914276123047,160
176.0,0.04428096488118172,165
156.0,0.020566236227750778,170
200.0,0.009313585236668587,175
200.0,0.011389960534870625,180
200.0,0.03430327773094177,185
200.0,0.03263617306947708,190
200.0,0.01683700643479824,195
200.0,0.022440128028392792,200
200.0,0.12438426911830902,205
199.0,0.02578575909137726,210
200.0,0.011934514157474041,215
200.0,0.016942474991083145,220
97.0,0.01568945124745369,225
114.0,0.12535057961940765,230
94.0,0.03555770218372345,235
200.0,0.04525809735059738,240
200.0,0.017689408734440804,245
200.0,0.0067247869446873665,250
200.0,0.07176659256219864,255
200.0,0.08657388389110565,260
141.0,0.033294644206762314,265
121.0,0.028403853997588158,270
200.0,2.290412664413452,275
200.0,0.02531827799975872,280
200.0,5.504024505615234,285
178.0,0.08186638355255127,290
200.0,0.08797675371170044,295
200.0,0.020431581884622574,300
132.0,5.576883316040039,305
200.0,4.848743438720703,310
200.0,0.016923055052757263,315
200.0,0.02683131955564022,320
124.0,0.015678368508815765,325
177.0,0.03775625675916672,330
137.0,0.05068105831742287,335
140.0,0.40797001123428345,340
166.0,0.02414533495903015,345
155.0,0.01930900663137436,350
164.0,0.011196689680218697,355
176.0,0.004606300499290228,360
167.0,0.0069297803565859795,365
152.0,0.003465760499238968,370
133.0,0.004011339973658323,375
178.0,0.014116533100605011,380
167.0,0.035929497331380844,385
188.0,0.056661978363990784,390
200.0,0.024751631543040276,395
200.0,4.973880290985107,400
168.0,0.09016193449497223,405
131.0,0.05364896357059479,410
143.0,0.03196289390325546,415
175.0,0.010464230552315712,420
137.0,0.043501902371644974,425
155.0,0.037118226289749146,430
124.0,0.013473874889314175,435
145.0,0.03192755952477455,440
200.0,0.02426156774163246,445
122.0,0.026452424004673958,450
162.0,3.928562641143799,455
131.0,0.029229773208498955,460
181.0,0.017442982643842697,465
186.0,0.007039761170744896,470
154.0,0.005024969577789307,475
200.0,0.016365719959139824,480
200.0,5.1473846435546875,485
12.0,0.027667846530675888,490
200.0,1.7594248056411743,495
200.0,0.08335447311401367,500
200.0,0.03457290679216385,505
200.0,0.011456770822405815,510
194.0,0.014004027470946312,515
174.0,0.031052935868501663,520
185.0,0.016657808795571327,525
200.0,0.018321434035897255,530
178.0,0.025208085775375366,535
200.0,0.009419861249625683,540
149.0,0.007532942574471235,545
149.0,0.01075670588761568,550
149.0,0.01898787170648575,555
127.0,0.013626886531710625,560
160.0,1.0664030313491821,565
200.0,0.006966365966945887,570
188.0,0.01090502180159092,575
110.0,0.009709188714623451,580
200.0,0.007233826443552971,585
200.0,5.274221897125244,590
200.0,0.058526113629341125,595
================================================
FILE: history/pg.csv
================================================
Batch_reward,Episode_reward,Loss,episode
126.0,25.2,-0.323914110660553,5
170.0,34.0,-0.3671955466270447,10
95.0,19.0,-0.31113868951797485,15
79.0,15.8,-0.31862735748291016,20
83.0,16.6,-0.31243768334388733,25
74.0,14.8,-0.2790282964706421,30
76.0,15.2,-0.30600470304489136,35
69.0,13.8,-0.2797362208366394,40
124.0,24.8,-0.31787288188934326,45
94.0,18.8,-0.32114318013191223,50
142.0,28.4,-0.3324996829032898,55
110.0,22.0,-0.3379059135913849,60
72.0,14.4,-0.31070083379745483,65
160.0,32.0,-0.36996564269065857,70
72.0,14.4,-0.3219853341579437,75
80.0,16.0,-0.3177907168865204,80
91.0,18.2,-0.31226593255996704,85
115.0,23.0,-0.3390122056007385,90
113.0,22.6,-0.32164397835731506,95
178.0,35.6,-0.35060709714889526,100
80.0,16.0,-0.3213381767272949,105
116.0,23.2,-0.3130705952644348,110
99.0,19.8,-0.3158109486103058,115
102.0,20.4,-0.32293465733528137,120
93.0,18.6,-0.30985644459724426,125
117.0,23.4,-0.33105579018592834,130
115.0,23.0,-0.3251442313194275,135
113.0,22.6,-0.356834352016449,140
126.0,25.2,-0.32076460123062134,145
100.0,20.0,-0.3276306986808777,150
89.0,17.8,-0.32530131936073303,155
82.0,16.4,-0.3186661899089813,160
128.0,25.6,-0.33558765053749084,165
101.0,20.2,-0.31853118538856506,170
108.0,21.6,-0.33060187101364136,175
120.0,24.0,-0.32635703682899475,180
154.0,30.8,-0.3299770951271057,185
116.0,23.2,-0.3230454623699188,190
197.0,39.4,-0.3595934212207794,195
93.0,18.6,-0.3127687871456146,200
133.0,26.6,-0.3340115547180176,205
125.0,25.0,-0.32840797305107117,210
99.0,19.8,-0.32053759694099426,215
139.0,27.8,-0.3281051516532898,220
141.0,28.2,-0.32602617144584656,225
145.0,29.0,-0.33977046608924866,230
96.0,19.2,-0.3226241171360016,235
114.0,22.8,-0.33663344383239746,240
111.0,22.2,-0.3363283574581146,245
162.0,32.4,-0.3411697745323181,250
128.0,25.6,-0.3296198844909668,255
91.0,18.2,-0.32588136196136475,260
111.0,22.2,-0.3307957649230957,265
107.0,21.4,-0.3256392478942871,270
129.0,25.8,-0.33310621976852417,275
66.0,13.2,-0.3237151503562927,280
121.0,24.2,-0.3433718979358673,285
75.0,15.0,-0.3230298161506653,290
117.0,23.4,-0.3261656165122986,295
176.0,35.2,-0.3627626895904541,300
121.0,24.2,-0.3326563835144043,305
96.0,19.2,-0.32595303654670715,310
98.0,19.6,-0.3163517415523529,315
83.0,16.6,-0.3248583674430847,320
123.0,24.6,-0.3384328782558441,325
84.0,16.8,-0.3206236958503723,330
122.0,24.4,-0.3618758022785187,335
71.0,14.2,-0.32045409083366394,340
102.0,20.4,-0.3334352970123291,345
78.0,15.6,-0.3175198435783386,350
154.0,30.8,-0.370138555765152,355
107.0,21.4,-0.3431130051612854,360
102.0,20.4,-0.3242567479610443,365
91.0,18.2,-0.32735154032707214,370
128.0,25.6,-0.33163779973983765,375
89.0,17.8,-0.31738248467445374,380
127.0,25.4,-0.38126614689826965,385
78.0,15.6,-0.32667869329452515,390
98.0,19.6,-0.3316572606563568,395
147.0,29.4,-0.3489876985549927,400
109.0,21.8,-0.328703373670578,405
115.0,23.0,-0.3258844017982483,410
77.0,15.4,-0.3242562413215637,415
109.0,21.8,-0.3315679430961609,420
125.0,25.0,-0.35489338636398315,425
81.0,16.2,-0.32471272349357605,430
78.0,15.6,-0.32005593180656433,435
97.0,19.4,-0.31360965967178345,440
125.0,25.0,-0.3440498113632202,445
112.0,22.4,-0.33888572454452515,450
128.0,25.6,-0.32566192746162415,455
81.0,16.2,-0.325516015291214,460
193.0,38.6,-0.3992687165737152,465
159.0,31.8,-0.36642223596572876,470
141.0,28.2,-0.3294885456562042,475
115.0,23.0,-0.32550784945487976,480
86.0,17.2,-0.32193079590797424,485
117.0,23.4,-0.3378137946128845,490
93.0,18.6,-0.3283882141113281,495
91.0,18.2,-0.31916195154190063,500
155.0,31.0,-0.35981062054634094,505
122.0,24.4,-0.33529549837112427,510
122.0,24.4,-0.3334076702594757,515
127.0,25.4,-0.3241950273513794,520
69.0,13.8,-0.31813812255859375,525
100.0,20.0,-0.3260694742202759,530
96.0,19.2,-0.3393671214580536,535
108.0,21.6,-0.3339618146419525,540
103.0,20.6,-0.32321697473526,545
108.0,21.6,-0.34803831577301025,550
209.0,41.8,-0.37500619888305664,555
90.0,18.0,-0.31353095173835754,560
91.0,18.2,-0.32434719800949097,565
101.0,20.2,-0.3281025290489197,570
169.0,33.8,-0.3593110740184784,575
95.0,19.0,-0.331133633852005,580
122.0,24.4,-0.3337944746017456,585
135.0,27.0,-0.3440612852573395,590
84.0,16.8,-0.3325881063938141,595
101.0,20.2,-0.3345527946949005,600
75.0,15.0,-0.3226890563964844,605
111.0,22.2,-0.32936322689056396,610
156.0,31.2,-0.34380999207496643,615
89.0,17.8,-0.3252561688423157,620
133.0,26.6,-0.3402594327926636,625
106.0,21.2,-0.3349014222621918,630
129.0,25.8,-0.33368977904319763,635
108.0,21.6,-0.33249637484550476,640
93.0,18.6,-0.3272244334220886,645
116.0,23.2,-0.327767550945282,650
94.0,18.8,-0.3070034980773926,655
104.0,20.8,-0.32560011744499207,660
89.0,17.8,-0.3445221483707428,665
145.0,29.0,-0.3271236717700958,670
110.0,22.0,-0.33397266268730164,675
120.0,24.0,-0.32614800333976746,680
123.0,24.6,-0.3399856388568878,685
170.0,34.0,-0.36179420351982117,690
192.0,38.4,-0.36600160598754883,695
128.0,25.6,-0.32878589630126953,700
117.0,23.4,-0.341595321893692,705
134.0,26.8,-0.317445307970047,710
95.0,19.0,-0.333281010389328,715
146.0,29.2,-0.3421921133995056,720
126.0,25.2,-0.3426232933998108,725
107.0,21.4,-0.33393988013267517,730
104.0,20.8,-0.32600662112236023,735
146.0,29.2,-0.33033308386802673,740
89.0,17.8,-0.32442706823349,745
166.0,33.2,-0.36430254578590393,750
166.0,33.2,-0.33611834049224854,755
111.0,22.2,-0.32691603899002075,760
165.0,33.0,-0.3286210894584656,765
131.0,26.2,-0.3302786946296692,770
128.0,25.6,-0.34573444724082947,775
141.0,28.2,-0.34006255865097046,780
141.0,28.2,-0.32640042901039124,785
144.0,28.8,-0.32620134949684143,790
191.0,38.2,-0.357293039560318,795
125.0,25.0,-0.32633158564567566,800
143.0,28.6,-0.3263686001300812,805
124.0,24.8,-0.323049932718277,810
177.0,35.4,-0.3612987697124481,815
95.0,19.0,-0.3241707980632782,820
162.0,32.4,-0.3730015158653259,825
99.0,19.8,-0.3270459771156311,830
119.0,23.8,-0.3332597017288208,835
114.0,22.8,-0.32190534472465515,840
109.0,21.8,-0.33435577154159546,845
97.0,19.4,-0.3109934329986572,850
85.0,17.0,-0.3222292363643646,855
110.0,22.0,-0.32931065559387207,860
79.0,15.8,-0.3263776898384094,865
126.0,25.2,-0.3366556465625763,870
109.0,21.8,-0.3179785907268524,875
127.0,25.4,-0.33851853013038635,880
108.0,21.6,-0.33515265583992004,885
62.0,12.4,-0.32123520970344543,890
130.0,26.0,-0.3218475580215454,895
135.0,27.0,-0.33280518651008606,900
104.0,20.8,-0.34523093700408936,905
145.0,29.0,-0.32937273383140564,910
150.0,30.0,-0.3692808151245117,915
97.0,19.4,-0.33158236742019653,920
160.0,32.0,-0.33346542716026306,925
108.0,21.6,-0.3280292749404907,930
214.0,42.8,-0.3434847891330719,935
124.0,24.8,-0.34609928727149963,940
153.0,30.6,-0.3434329926967621,945
105.0,21.0,-0.3302847146987915,950
118.0,23.6,-0.3245857357978821,955
129.0,25.8,-0.3407069146633148,960
147.0,29.4,-0.33763107657432556,965
183.0,36.6,-0.3729757070541382,970
129.0,25.8,-0.3388650119304657,975
178.0,35.6,-0.355676531791687,980
142.0,28.4,-0.3570093512535095,985
94.0,18.8,-0.32580772042274475,990
216.0,43.2,-0.34840127825737,995
139.0,27.8,-0.3191074728965759,1000
97.0,19.4,-0.321834534406662,1005
165.0,33.0,-0.3293624520301819,1010
128.0,25.6,-0.31909996271133423,1015
90.0,18.0,-0.3221004605293274,1020
120.0,24.0,-0.3225758969783783,1025
110.0,22.0,-0.3229179382324219,1030
183.0,36.6,-0.3358568251132965,1035
107.0,21.4,-0.3292447030544281,1040
140.0,28.0,-0.32179105281829834,1045
136.0,27.2,-0.32573065161705017,1050
246.0,49.2,-0.345488578081131,1055
117.0,23.4,-0.34006670117378235,1060
257.0,51.4,-0.33879947662353516,1065
169.0,33.8,-0.350941926240921,1070
185.0,37.0,-0.3586171865463257,1075
141.0,28.2,-0.3215785622596741,1080
152.0,30.4,-0.3365849554538727,1085
157.0,31.4,-0.3653704524040222,1090
159.0,31.8,-0.33440902829170227,1095
120.0,24.0,-0.34093326330184937,1100
201.0,40.2,-0.33844923973083496,1105
219.0,43.8,-0.35109400749206543,1110
263.0,52.6,-0.3550150692462921,1115
201.0,40.2,-0.3496033847332001,1120
313.0,62.6,-0.33947136998176575,1125
302.0,60.4,-0.3442603647708893,1130
308.0,61.6,-0.33771267533302307,1135
251.0,50.2,-0.3532044291496277,1140
148.0,29.6,-0.30538347363471985,1145
233.0,46.6,-0.3530670702457428,1150
319.0,63.8,-0.3307492136955261,1155
350.0,70.0,-0.34717267751693726,1160
384.0,76.8,-0.32274261116981506,1165
422.0,84.4,-0.32839006185531616,1170
228.0,45.6,-0.30992454290390015,1175
367.0,73.4,-0.33390071988105774,1180
398.0,79.6,-0.3453659117221832,1185
565.0,113.0,-0.301306813955307,1190
490.0,98.0,-0.32132676243782043,1195
512.0,102.4,-0.31597256660461426,1200
365.0,73.0,-0.3505546748638153,1205
433.0,86.6,-0.32052144408226013,1210
498.0,99.6,-0.33124780654907227,1215
336.0,67.2,-0.3218598961830139,1220
311.0,62.2,-0.3413241505622864,1225
321.0,64.2,-0.3447491526603699,1230
187.0,37.4,-0.31791070103645325,1235
214.0,42.8,-0.29995036125183105,1240
209.0,41.8,-0.33326685428619385,1245
185.0,37.0,-0.28255948424339294,1250
271.0,54.2,-0.3443583846092224,1255
236.0,47.2,-0.3281277120113373,1260
186.0,37.2,-0.3085566759109497,1265
169.0,33.8,-0.29311126470565796,1270
174.0,34.8,-0.28989309072494507,1275
128.0,25.6,-0.2745927572250366,1280
156.0,31.2,-0.2774413526058197,1285
141.0,28.2,-0.2932639718055725,1290
181.0,36.2,-0.2723075747489929,1295
190.0,38.0,-0.28218960762023926,1300
198.0,39.6,-0.27305474877357483,1305
161.0,32.2,-0.2835294008255005,1310
137.0,27.4,-0.28676924109458923,1315
185.0,37.0,-0.30793672800064087,1320
138.0,27.6,-0.2708069086074829,1325
139.0,27.8,-0.2733404040336609,1330
169.0,33.8,-0.25757548213005066,1335
194.0,38.8,-0.27253004908561707,1340
188.0,37.6,-0.2716241776943207,1345
254.0,50.8,-0.2872135043144226,1350
275.0,55.0,-0.30489450693130493,1355
314.0,62.8,-0.30115067958831787,1360
259.0,51.8,-0.28657886385917664,1365
274.0,54.8,-0.2947578728199005,1370
315.0,63.0,-0.284370481967926,1375
420.0,84.0,-0.28122013807296753,1380
304.0,60.8,-0.29020482301712036,1385
347.0,69.4,-0.2792486846446991,1390
436.0,87.2,-0.2617129385471344,1395
298.0,59.6,-0.27968278527259827,1400
290.0,58.0,-0.2966199517250061,1405
293.0,58.6,-0.2687513530254364,1410
301.0,60.2,-0.2640783488750458,1415
297.0,59.4,-0.2812418043613434,1420
295.0,59.0,-0.2812604308128357,1425
308.0,61.6,-0.2759893238544464,1430
346.0,69.2,-0.30547887086868286,1435
275.0,55.0,-0.26983094215393066,1440
272.0,54.4,-0.24922586977481842,1445
419.0,83.8,-0.2561200261116028,1450
316.0,63.2,-0.2530679702758789,1455
317.0,63.4,-0.2537248432636261,1460
306.0,61.2,-0.2625858783721924,1465
329.0,65.8,-0.266976922750473,1470
326.0,65.2,-0.2646888196468353,1475
428.0,85.6,-0.2437351644039154,1480
316.0,63.2,-0.260174036026001,1485
367.0,73.4,-0.27776938676834106,1490
373.0,74.6,-0.26097598671913147,1495
318.0,63.6,-0.27720341086387634,1500
346.0,69.2,-0.2672405540943146,1505
325.0,65.0,-0.2874023914337158,1510
415.0,83.0,-0.25887230038642883,1515
503.0,100.6,-0.26642149686813354,1520
418.0,83.6,-0.26785600185394287,1525
322.0,64.4,-0.2640377879142761,1530
570.0,114.0,-0.24567648768424988,1535
343.0,68.6,-0.26842209696769714,1540
323.0,64.6,-0.27676114439964294,1545
296.0,59.2,-0.2819293141365051,1550
619.0,123.8,-0.26687514781951904,1555
416.0,83.2,-0.284103661775589,1560
498.0,99.6,-0.29981642961502075,1565
268.0,53.6,-0.26020547747612,1570
428.0,85.6,-0.2733575105667114,1575
411.0,82.2,-0.29171478748321533,1580
363.0,72.6,-0.2915985882282257,1585
376.0,75.2,-0.2707919180393219,1590
452.0,90.4,-0.2971310317516327,1595
367.0,73.4,-0.2947559058666229,1600
380.0,76.0,-0.2887576222419739,1605
519.0,103.8,-0.2703001797199249,1610
407.0,81.4,-0.2852098345756531,1615
668.0,133.6,-0.2705146074295044,1620
436.0,87.2,-0.30569276213645935,1625
418.0,83.6,-0.2917002737522125,1630
527.0,105.4,-0.2697019577026367,1635
541.0,108.2,-0.2923319935798645,1640
450.0,90.0,-0.2872810661792755,1645
486.0,97.2,-0.26877787709236145,1650
586.0,117.2,-0.2843734323978424,1655
504.0,100.8,-0.29393646121025085,1660
495.0,99.0,-0.28225526213645935,1665
426.0,85.2,-0.31057173013687134,1670
444.0,88.8,-0.31152743101119995,1675
456.0,91.2,-0.28087612986564636,1680
471.0,94.2,-0.2913546562194824,1685
496.0,99.2,-0.2839125692844391,1690
467.0,93.4,-0.2844887375831604,1695
463.0,92.6,-0.27567774057388306,1700
518.0,103.6,-0.27867573499679565,1705
537.0,107.4,-0.28661832213401794,1710
638.0,127.6,-0.2699316143989563,1715
670.0,134.0,-0.2734288275241852,1720
769.0,153.8,-0.26910433173179626,1725
645.0,129.0,-0.26937440037727356,1730
456.0,91.2,-0.2961300015449524,1735
591.0,118.2,-0.27045905590057373,1740
672.0,134.4,-0.26792922616004944,1745
643.0,128.6,-0.28915852308273315,1750
841.0,168.2,-0.25474753975868225,1755
744.0,148.8,-0.26696857810020447,1760
754.0,150.8,-0.2589016258716583,1765
745.0,149.0,-0.27511686086654663,1770
678.0,135.6,-0.27377811074256897,1775
542.0,108.4,-0.28012797236442566,1780
482.0,96.4,-0.2984004616737366,1785
564.0,112.8,-0.28570491075515747,1790
526.0,105.2,-0.28553274273872375,1795
429.0,85.8,-0.31285083293914795,1800
642.0,128.4,-0.28988713026046753,1805
789.0,157.8,-0.25857824087142944,1810
618.0,123.6,-0.30434682965278625,1815
693.0,138.6,-0.27829989790916443,1820
821.0,164.2,-0.26921334862709045,1825
881.0,176.2,-0.24802248179912567,1830
922.0,184.4,-0.25607961416244507,1835
763.0,152.6,-0.2877015173435211,1840
731.0,146.2,-0.27064964175224304,1845
818.0,163.6,-0.26569467782974243,1850
765.0,153.0,-0.27027228474617004,1855
844.0,168.8,-0.2618628740310669,1860
589.0,117.8,-0.29191330075263977,1865
787.0,157.4,-0.2632910907268524,1870
867.0,173.4,-0.25515371561050415,1875
915.0,183.0,-0.24169787764549255,1880
752.0,150.4,-0.2643362581729889,1885
749.0,149.8,-0.27746638655662537,1890
859.0,171.8,-0.2660120725631714,1895
775.0,155.0,-0.27933937311172485,1900
694.0,138.8,-0.2731143534183502,1905
851.0,170.2,-0.26047879457473755,1910
611.0,122.2,-0.3043682277202606,1915
648.0,129.6,-0.2933899164199829,1920
529.0,105.8,-0.2947007119655609,1925
360.0,72.0,-0.3085305988788605,1930
495.0,99.0,-0.32446032762527466,1935
767.0,153.4,-0.26316165924072266,1940
482.0,96.4,-0.29827943444252014,1945
458.0,91.6,-0.3013209402561188,1950
569.0,113.8,-0.2856493592262268,1955
540.0,108.0,-0.2954277992248535,1960
657.0,131.4,-0.2848345935344696,1965
567.0,113.4,-0.2670009136199951,1970
661.0,132.2,-0.2961806654930115,1975
374.0,74.8,-0.31242892146110535,1980
514.0,102.8,-0.29416441917419434,1985
439.0,87.8,-0.3018152713775635,1990
589.0,117.8,-0.2866000533103943,1995
673.0,134.6,-0.27572011947631836,2000
491.0,98.2,-0.28802257776260376,2005
502.0,100.4,-0.2913190424442291,2010
684.0,136.8,-0.2605753540992737,2015
585.0,117.0,-0.2784154713153839,2020
490.0,98.0,-0.3181760609149933,2025
481.0,96.2,-0.3038674592971802,2030
559.0,111.8,-0.2845531105995178,2035
344.0,68.8,-0.3110370337963104,2040
468.0,93.6,-0.32684192061424255,2045
550.0,110.0,-0.2799729108810425,2050
498.0,99.6,-0.2940945327281952,2055
478.0,95.6,-0.3053005337715149,2060
638.0,127.6,-0.2735568881034851,2065
501.0,100.2,-0.32170242071151733,2070
731.0,146.2,-0.2598632574081421,2075
577.0,115.4,-0.27256399393081665,2080
495.0,99.0,-0.2753492295742035,2085
655.0,131.0,-0.2919852137565613,2090
656.0,131.2,-0.27932503819465637,2095
519.0,103.8,-0.28536275029182434,2100
425.0,85.0,-0.32349893450737,2105
303.0,60.6,-0.32709527015686035,2110
464.0,92.8,-0.3068963289260864,2115
354.0,70.8,-0.2968566119670868,2120
459.0,91.8,-0.29264581203460693,2125
454.0,90.8,-0.29909512400627136,2130
480.0,96.0,-0.30375662446022034,2135
511.0,102.2,-0.3016867935657501,2140
438.0,87.6,-0.28403419256210327,2145
325.0,65.0,-0.3250955045223236,2150
445.0,89.0,-0.3128427267074585,2155
476.0,95.2,-0.2751147747039795,2160
349.0,69.8,-0.3309052586555481,2165
323.0,64.6,-0.29476287961006165,2170
306.0,61.2,-0.29730474948883057,2175
275.0,55.0,-0.28807350993156433,2180
262.0,52.4,-0.2849494516849518,2185
247.0,49.4,-0.2895693778991699,2190
390.0,78.0,-0.28957656025886536,2195
398.0,79.6,-0.30953487753868103,2200
315.0,63.0,-0.3229285478591919,2205
244.0,48.8,-0.29578179121017456,2210
232.0,46.4,-0.2965617775917053,2215
325.0,65.0,-0.30256932973861694,2220
217.0,43.4,-0.2804889678955078,2225
286.0,57.2,-0.3051661550998688,2230
366.0,73.2,-0.30820080637931824,2235
250.0,50.0,-0.2960183918476105,2240
387.0,77.4,-0.3190012574195862,2245
349.0,69.8,-0.3200511932373047,2250
313.0,62.6,-0.3188655376434326,2255
320.0,64.0,-0.31194472312927246,2260
372.0,74.4,-0.29605546593666077,2265
392.0,78.4,-0.29973769187927246,2270
381.0,76.2,-0.3098653554916382,2275
307.0,61.4,-0.309661328792572,2280
295.0,59.0,-0.3135487735271454,2285
310.0,62.0,-0.32026296854019165,2290
360.0,72.0,-0.3253321349620819,2295
255.0,51.0,-0.3225225806236267,2300
260.0,52.0,-0.28479400277137756,2305
304.0,60.8,-0.30789539217948914,2310
469.0,93.8,-0.3014775216579437,2315
300.0,60.0,-0.3001944422721863,2320
303.0,60.6,-0.280273973941803,2325
325.0,65.0,-0.30136871337890625,2330
323.0,64.6,-0.3079989552497864,2335
392.0,78.4,-0.32181212306022644,2340
325.0,65.0,-0.31631535291671753,2345
515.0,103.0,-0.2901548147201538,2350
370.0,74.0,-0.3226360082626343,2355
268.0,53.6,-0.30735111236572266,2360
408.0,81.6,-0.31560268998146057,2365
338.0,67.6,-0.3099435269832611,2370
282.0,56.4,-0.2960219085216522,2375
276.0,55.2,-0.3091493546962738,2380
400.0,80.0,-0.3270515203475952,2385
383.0,76.6,-0.32039105892181396,2390
298.0,59.6,-0.3318256437778473,2395
403.0,80.6,-0.30560219287872314,2400
389.0,77.8,-0.3165701627731323,2405
314.0,62.8,-0.3352294862270355,2410
327.0,65.4,-0.3150443136692047,2415
440.0,88.0,-0.3004245162010193,2420
437.0,87.4,-0.31933194398880005,2425
377.0,75.4,-0.3215472400188446,2430
449.0,89.8,-0.30864641070365906,2435
283.0,56.6,-0.3230055272579193,2440
363.0,72.6,-0.321520060300827,2445
340.0,68.0,-0.33344605565071106,2450
398.0,79.6,-0.3284223675727844,2455
278.0,55.6,-0.3242235481739044,2460
387.0,77.4,-0.3207733631134033,2465
401.0,80.2,-0.3539959192276001,2470
294.0,58.8,-0.3313872814178467,2475
381.0,76.2,-0.3344331979751587,2480
214.0,42.8,-0.3258454501628876,2485
361.0,72.2,-0.30855175852775574,2490
202.0,40.4,-0.3441172242164612,2495
344.0,68.8,-0.3241690397262573,2500
265.0,53.0,-0.3344844877719879,2505
280.0,56.0,-0.3179953098297119,2510
295.0,59.0,-0.33485040068626404,2515
442.0,88.4,-0.33316734433174133,2520
379.0,75.8,-0.35698387026786804,2525
318.0,63.6,-0.33826303482055664,2530
251.0,50.2,-0.3398612439632416,2535
300.0,60.0,-0.32650598883628845,2540
255.0,51.0,-0.33304497599601746,2545
226.0,45.2,-0.33001911640167236,2550
409.0,81.8,-0.3345124125480652,2555
375.0,75.0,-0.3362683951854706,2560
362.0,72.4,-0.3233446180820465,2565
352.0,70.4,-0.33965399861335754,2570
317.0,63.4,-0.35508760809898376,2575
283.0,56.6,-0.34275469183921814,2580
372.0,74.4,-0.3211212158203125,2585
405.0,81.0,-0.3200519382953644,2590
412.0,82.4,-0.3162257969379425,2595
588.0,117.6,-0.2985230088233948,2600
363.0,72.6,-0.3323720693588257,2605
335.0,67.0,-0.3439211845397949,2610
398.0,79.6,-0.3534368872642517,2615
267.0,53.4,-0.3095570504665375,2620
323.0,64.6,-0.32619574666023254,2625
472.0,94.4,-0.3172033131122589,2630
348.0,69.6,-0.3429124355316162,2635
264.0,52.8,-0.2987551689147949,2640
368.0,73.6,-0.35467034578323364,2645
316.0,63.2,-0.3708944618701935,2650
331.0,66.2,-0.3331873118877411,2655
325.0,65.0,-0.35394689440727234,2660
320.0,64.0,-0.3461282551288605,2665
287.0,57.4,-0.3383663594722748,2670
299.0,59.8,-0.34119999408721924,2675
358.0,71.6,-0.3390156924724579,2680
315.0,63.0,-0.3429752290248871,2685
170.0,34.0,-0.3075783848762512,2690
333.0,66.6,-0.33995160460472107,2695
321.0,64.2,-0.35553258657455444,2700
292.0,58.4,-0.32117390632629395,2705
384.0,76.8,-0.3236222267150879,2710
382.0,76.4,-0.3598511815071106,2715
299.0,59.8,-0.3402957320213318,2720
440.0,88.0,-0.33124813437461853,2725
345.0,69.0,-0.3368532955646515,2730
401.0,80.2,-0.3405100107192993,2735
373.0,74.6,-0.35578832030296326,2740
304.0,60.8,-0.3320891857147217,2745
409.0,81.8,-0.35458457469940186,2750
479.0,95.8,-0.3229633867740631,2755
332.0,66.4,-0.37328967452049255,2760
345.0,69.0,-0.3245355188846588,2765
416.0,83.2,-0.3375442326068878,2770
273.0,54.6,-0.3345900774002075,2775
523.0,104.6,-0.3130764961242676,2780
349.0,69.8,-0.34857919812202454,2785
246.0,49.2,-0.3524782657623291,2790
261.0,52.2,-0.3142869770526886,2795
281.0,56.2,-0.3461417257785797,2800
410.0,82.0,-0.32556286454200745,2805
445.0,89.0,-0.314216673374176,2810
427.0,85.4,-0.3143163025379181,2815
493.0,98.6,-0.32997140288352966,2820
288.0,57.6,-0.3426615595817566,2825
252.0,50.4,-0.32277217507362366,2830
297.0,59.4,-0.3320043683052063,2835
339.0,67.8,-0.3409106731414795,2840
372.0,74.4,-0.34068194031715393,2845
281.0,56.2,-0.34562399983406067,2850
240.0,48.0,-0.3473271429538727,2855
348.0,69.6,-0.3337392210960388,2860
318.0,63.6,-0.3524182140827179,2865
214.0,42.8,-0.32239580154418945,2870
341.0,68.2,-0.3237490952014923,2875
212.0,42.4,-0.34856706857681274,2880
308.0,61.6,-0.35450854897499084,2885
253.0,50.6,-0.33882418274879456,2890
258.0,51.6,-0.3370402753353119,2895
222.0,44.4,-0.3146362602710724,2900
356.0,71.2,-0.3271253705024719,2905
280.0,56.0,-0.3200274407863617,2910
359.0,71.8,-0.3188944458961487,2915
379.0,75.8,-0.3070104420185089,2920
340.0,68.0,-0.3329097330570221,2925
234.0,46.8,-0.3134278953075409,2930
399.0,79.8,-0.3333054184913635,2935
382.0,76.4,-0.32842057943344116,2940
338.0,67.6,-0.32705143094062805,2945
396.0,79.2,-0.31444960832595825,2950
294.0,58.8,-0.34350016713142395,2955
279.0,55.8,-0.3429255187511444,2960
297.0,59.4,-0.3333609700202942,2965
312.0,62.4,-0.3460474908351898,2970
356.0,71.2,-0.36129605770111084,2975
360.0,72.0,-0.33534759283065796,2980
417.0,83.4,-0.31144455075263977,2985
307.0,61.4,-0.3263359069824219,2990
446.0,89.2,-0.33677947521209717,2995
308.0,61.6,-0.31382012367248535,3000
409.0,81.8,-0.3109150230884552,3005
313.0,62.6,-0.33344486355781555,3010
360.0,72.0,-0.3305971026420593,3015
380.0,76.0,-0.33848661184310913,3020
428.0,85.6,-0.33520564436912537,3025
417.0,83.4,-0.3054162859916687,3030
361.0,72.2,-0.3295208215713501,3035
339.0,67.8,-0.32029515504837036,3040
324.0,64.8,-0.3317856788635254,3045
345.0,69.0,-0.32017967104911804,3050
355.0,71.0,-0.35359203815460205,3055
446.0,89.2,-0.31876492500305176,3060
512.0,102.4,-0.3052927553653717,3065
287.0,57.4,-0.30824708938598633,3070
393.0,78.6,-0.30933746695518494,3075
488.0,97.6,-0.2817961871623993,3080
546.0,109.2,-0.3001251816749573,3085
360.0,72.0,-0.324937105178833,3090
339.0,67.8,-0.31537944078445435,3095
426.0,85.2,-0.30676764249801636,3100
359.0,71.8,-0.32886287569999695,3105
324.0,64.8,-0.3065482974052429,3110
291.0,58.2,-0.3147521913051605,3115
409.0,81.8,-0.301559716463089,3120
384.0,76.8,-0.32866623997688293,3125
496.0,99.2,-0.31302568316459656,3130
354.0,70.8,-0.3215748965740204,3135
331.0,66.2,-0.3109250068664551,3140
492.0,98.4,-0.30740219354629517,3145
357.0,71.4,-0.30839717388153076,3150
486.0,97.2,-0.3035200834274292,3155
349.0,69.8,-0.34934717416763306,3160
351.0,70.2,-0.32749202847480774,3165
249.0,49.8,-0.33485740423202515,3170
260.0,52.0,-0.31605178117752075,3175
474.0,94.8,-0.29330599308013916,3180
307.0,61.4,-0.323680579662323,3185
247.0,49.4,-0.3127794861793518,3190
237.0,47.4,-0.32012975215911865,3195
209.0,41.8,-0.3009372353553772,3200
264.0,52.8,-0.31981420516967773,3205
266.0,53.2,-0.3103926479816437,3210
204.0,40.8,-0.3112174868583679,3215
300.0,60.0,-0.3256223201751709,3220
263.0,52.6,-0.32424187660217285,3225
199.0,39.8,-0.32241830229759216,3230
179.0,35.8,-0.29255419969558716,3235
163.0,32.6,-0.29931506514549255,3240
168.0,33.6,-0.28654876351356506,3245
203.0,40.6,-0.2834084928035736,3250
226.0,45.2,-0.3224877417087555,3255
261.0,52.2,-0.28535372018814087,3260
227.0,45.4,-0.316785991191864,3265
127.0,25.4,-0.29044753313064575,3270
199.0,39.8,-0.3178306818008423,3275
179.0,35.8,-0.28865891695022583,3280
210.0,42.0,-0.31542903184890747,3285
223.0,44.6,-0.3022362291812897,3290
298.0,59.6,-0.3037247657775879,3295
265.0,53.0,-0.30964574217796326,3300
264.0,52.8,-0.30173414945602417,3305
256.0,51.2,-0.3340996503829956,3310
313.0,62.6,-0.33198970556259155,3315
357.0,71.4,-0.31368961930274963,3320
388.0,77.6,-0.3070986270904541,3325
369.0,73.8,-0.32399335503578186,3330
292.0,58.4,-0.3216218650341034,3335
304.0,60.8,-0.3278127610683441,3340
376.0,75.2,-0.3330898880958557,3345
502.0,100.4,-0.30079925060272217,3350
446.0,89.2,-0.3284432590007782,3355
325.0,65.0,-0.32955601811408997,3360
226.0,45.2,-0.3306828439235687,3365
553.0,110.6,-0.29716169834136963,3370
441.0,88.2,-0.30524060130119324,3375
405.0,81.0,-0.3227323591709137,3380
287.0,57.4,-0.3213828504085541,3385
561.0,112.2,-0.30946630239486694,3390
495.0,99.0,-0.29987454414367676,3395
322.0,64.4,-0.3116146922111511,3400
375.0,75.0,-0.3172515332698822,3405
335.0,67.0,-0.31124642491340637,3410
347.0,69.4,-0.3313350975513458,3415
447.0,89.4,-0.3128863573074341,3420
443.0,88.6,-0.323086678981781,3425
372.0,74.4,-0.33466094732284546,3430
390.0,78.0,-0.32339927554130554,3435
336.0,67.2,-0.341411828994751,3440
371.0,74.2,-0.31383463740348816,3445
224.0,44.8,-0.3281266987323761,3450
512.0,102.4,-0.3126053512096405,3455
410.0,82.0,-0.28869909048080444,3460
401.0,80.2,-0.3258347809314728,3465
398.0,79.6,-0.30318811535835266,3470
367.0,73.4,-0.3302140533924103,3475
470.0,94.0,-0.29585710167884827,3480
406.0,81.2,-0.3037850260734558,3485
333.0,66.6,-0.309684693813324,3490
515.0,103.0,-0.3057820796966553,3495
511.0,102.2,-0.2935052514076233,3500
265.0,53.0,-0.3512534499168396,3505
313.0,62.6,-0.30854520201683044,3510
589.0,117.8,-0.2876599431037903,3515
418.0,83.6,-0.30609408020973206,3520
337.0,67.4,-0.33163246512413025,3525
352.0,70.4,-0.28263986110687256,3530
225.0,45.0,-0.32982054352760315,3535
443.0,88.6,-0.3075873553752899,3540
321.0,64.2,-0.312394380569458,3545
319.0,63.8,-0.30340978503227234,3550
559.0,111.8,-0.29932039976119995,3555
367.0,73.4,-0.32565683126449585,3560
340.0,68.0,-0.3189055323600769,3565
383.0,76.6,-0.3060702681541443,3570
524.0,104.8,-0.2893108129501343,3575
422.0,84.4,-0.31200963258743286,3580
406.0,81.2,-0.3110651969909668,3585
580.0,116.0,-0.3003738820552826,3590
521.0,104.2,-0.28827184438705444,3595
594.0,118.8,-0.2962532043457031,3600
532.0,106.4,-0.2853490114212036,3605
310.0,62.0,-0.32882094383239746,3610
571.0,114.2,-0.29556986689567566,3615
391.0,78.2,-0.30969303846359253,3620
358.0,71.6,-0.35239845514297485,3625
522.0,104.4,-0.2969819903373718,3630
469.0,93.8,-0.278849333524704,3635
408.0,81.6,-0.29693683981895447,3640
615.0,123.0,-0.27214157581329346,3645
516.0,103.2,-0.2786034643650055,3650
476.0,95.2,-0.28871846199035645,3655
524.0,104.8,-0.2695292532444,3660
604.0,120.8,-0.2654711604118347,3665
579.0,115.8,-0.2629389762878418,3670
640.0,128.0,-0.24999983608722687,3675
633.0,126.6,-0.2714177370071411,3680
513.0,102.6,-0.288544237613678,3685
518.0,103.6,-0.2821209132671356,3690
408.0,81.6,-0.2815868556499481,3695
472.0,94.4,-0.29285475611686707,3700
433.0,86.6,-0.28891414403915405,3705
448.0,89.6,-0.283041775226593,3710
417.0,83.4,-0.2867187559604645,3715
427.0,85.4,-0.29195308685302734,3720
453.0,90.6,-0.30142688751220703,3725
456.0,91.2,-0.30443498492240906,3730
443.0,88.6,-0.29843389987945557,3735
491.0,98.2,-0.2915704548358917,3740
444.0,88.8,-0.28764721751213074,3745
406.0,81.2,-0.29444530606269836,3750
564.0,112.8,-0.24998706579208374,3755
535.0,107.0,-0.28250670433044434,3760
426.0,85.2,-0.2872706353664398,3765
451.0,90.2,-0.26859310269355774,3770
559.0,111.8,-0.26078301668167114,3775
483.0,96.6,-0.2515645921230316,3780
500.0,100.0,-0.24650022387504578,3785
585.0,117.0,-0.24152213335037231,3790
463.0,92.6,-0.26292717456817627,3795
483.0,96.6,-0.2518559694290161,3800
499.0,99.8,-0.2348634898662567,3805
550.0,110.0,-0.26576220989227295,3810
700.0,140.0,-0.240694060921669,3815
560.0,112.0,-0.23301252722740173,3820
579.0,115.8,-0.25920626521110535,3825
560.0,112.0,-0.2541171908378601,3830
598.0,119.6,-0.24800601601600647,3835
401.0,80.2,-0.2769554555416107,3840
933.0,186.6,-0.2157062292098999,3845
666.0,133.2,-0.2362421602010727,3850
664.0,132.8,-0.2353663593530655,3855
459.0,91.8,-0.28326261043548584,3860
590.0,118.0,-0.2582065761089325,3865
770.0,154.0,-0.23853853344917297,3870
741.0,148.2,-0.24289435148239136,3875
801.0,160.2,-0.24660153687000275,3880
885.0,177.0,-0.2432679533958435,3885
995.0,199.0,-0.22121961414813995,3890
893.0,178.6,-0.22451677918434143,3895
1000.0,200.0,-0.21893183887004852,3900
866.0,173.2,-0.2338653802871704,3905
877.0,175.4,-0.23866155743598938,3910
866.0,173.2,-0.22646315395832062,3915
820.0,164.0,-0.23564893007278442,3920
906.0,181.2,-0.2351033091545105,3925
1000.0,200.0,-0.23822283744812012,3930
1000.0,200.0,-0.2256428599357605,3935
848.0,169.6,-0.22264470160007477,3940
1000.0,200.0,-0.2249828279018402,3945
850.0,170.0,-0.2305542677640915,3950
832.0,166.4,-0.23433412611484528,3955
877.0,175.4,-0.2427292764186859,3960
862.0,172.4,-0.22772324085235596,3965
933.0,186.6,-0.23778697848320007,3970
949.0,189.8,-0.21746407449245453,3975
889.0,177.8,-0.22645731270313263,3980
955.0,191.0,-0.2358730137348175,3985
937.0,187.4,-0.22857512533664703,3990
882.0,176.4,-0.2432265728712082,3995
892.0,178.4,-0.2350594699382782,4000
758.0,151.6,-0.23316599428653717,4005
922.0,184.4,-0.22859112918376923,4010
776.0,155.2,-0.23555603623390198,4015
744.0,148.8,-0.23500294983386993,4020
689.0,137.8,-0.25206661224365234,4025
723.0,144.6,-0.2562565505504608,4030
597.0,119.4,-0.2625977396965027,4035
833.0,166.6,-0.23476195335388184,4040
686.0,137.2,-0.25012633204460144,4045
693.0,138.6,-0.2593766748905182,4050
626.0,125.2,-0.25423967838287354,4055
699.0,139.8,-0.2497188150882721,4060
629.0,125.8,-0.23464636504650116,4065
545.0,109.0,-0.22980281710624695,4070
612.0,122.4,-0.2374291568994522,4075
483.0,96.6,-0.24381035566329956,4080
597.0,119.4,-0.24583974480628967,4085
866.0,173.2,-0.21999792754650116,4090
566.0,113.2,-0.236464262008667,4095
520.0,104.0,-0.23599712550640106,4100
710.0,142.0,-0.2367580533027649,4105
636.0,127.2,-0.24715624749660492,4110
651.0,130.2,-0.23304535448551178,4115
890.0,178.0,-0.24069303274154663,4120
627.0,125.4,-0.23890958726406097,4125
911.0,182.2,-0.23527511954307556,4130
987.0,197.4,-0.22301892936229706,4135
964.0,192.8,-0.2309679090976715,4140
906.0,181.2,-0.2259397804737091,4145
885.0,177.0,-0.23679006099700928,4150
1000.0,200.0,-0.21609149873256683,4155
971.0,194.2,-0.22595475614070892,4160
957.0,191.4,-0.24088460206985474,4165
745.0,149.0,-0.236735999584198,4170
830.0,166.0,-0.23554329574108124,4175
969.0,193.8,-0.24249093234539032,4180
940.0,188.0,-0.2346430867910385,4185
960.0,192.0,-0.2440900355577469,4190
921.0,184.2,-0.2521948516368866,4195
937.0,187.4,-0.23159390687942505,4200
895.0,179.0,-0.25293585658073425,4205
871.0,174.2,-0.2548487186431885,4210
985.0,197.0,-0.2381192296743393,4215
856.0,171.2,-0.2526217997074127,4220
886.0,177.2,-0.24826432764530182,4225
915.0,183.0,-0.2533659338951111,4230
955.0,191.0,-0.232761412858963,4235
1000.0,200.0,-0.2348642200231552,4240
1000.0,200.0,-0.23721769452095032,4245
905.0,181.0,-0.24588888883590698,4250
999.0,199.8,-0.23869076371192932,4255
928.0,185.6,-0.2406618446111679,4260
869.0,173.8,-0.26329174637794495,4265
916.0,183.2,-0.24832451343536377,4270
849.0,169.8,-0.26125943660736084,4275
884.0,176.8,-0.25208091735839844,4280
674.0,134.8,-0.27581986784935,4285
875.0,175.0,-0.2634401023387909,4290
817.0,163.4,-0.2539278566837311,4295
762.0,152.4,-0.26039013266563416,4300
818.0,163.6,-0.25604119896888733,4305
947.0,189.4,-0.24396267533302307,4310
704.0,140.8,-0.2625954747200012,4315
835.0,167.0,-0.2363101691007614,4320
904.0,180.8,-0.2511977255344391,4325
1000.0,200.0,-0.2310427725315094,4330
885.0,177.0,-0.24242180585861206,4335
852.0,170.4,-0.24381664395332336,4340
744.0,148.8,-0.2467358559370041,4345
1000.0,200.0,-0.22906582057476044,4350
905.0,181.0,-0.2419399917125702,4355
1000.0,200.0,-0.21317607164382935,4360
1000.0,200.0,-0.22479897737503052,4365
925.0,185.0,-0.23383106291294098,4370
999.0,199.8,-0.21604128181934357,4375
977.0,195.4,-0.22164760529994965,4380
957.0,191.4,-0.20744213461875916,4385
877.0,175.4,-0.23589551448822021,4390
1000.0,200.0,-0.21450161933898926,4395
984.0,196.8,-0.21658353507518768,4400
943.0,188.6,-0.2249835729598999,4405
964.0,192.8,-0.2135917693376541,4410
1000.0,200.0,-0.21524985134601593,4415
999.0,199.8,-0.20534180104732513,4420
957.0,191.4,-0.2311737984418869,4425
931.0,186.2,-0.23489539325237274,4430
952.0,190.4,-0.22387759387493134,4435
1000.0,200.0,-0.20473726093769073,4440
1000.0,200.0,-0.20156295597553253,4445
957.0,191.4,-0.21567095816135406,4450
1000.0,200.0,-0.21938931941986084,4455
1000.0,200.0,-0.20336979627609253,4460
1000.0,200.0,-0.20344780385494232,4465
999.0,199.8,-0.21003496646881104,4470
1000.0,200.0,-0.21750986576080322,4475
968.0,193.6,-0.22262828052043915,4480
1000.0,200.0,-0.21291951835155487,4485
1000.0,200.0,-0.21631832420825958,4490
1000.0,200.0,-0.21128293871879578,4495
951.0,190.2,-0.22381260991096497,4500
1000.0,200.0,-0.2149495780467987,4505
1000.0,200.0,-0.1978849470615387,4510
1000.0,200.0,-0.1983804553747177,4515
1000.0,200.0,-0.21148160099983215,4520
1000.0,200.0,-0.22023101150989532,4525
1000.0,200.0,-0.2018955647945404,4530
1000.0,200.0,-0.21775908768177032,4535
1000.0,200.0,-0.21060989797115326,4540
1000.0,200.0,-0.22002004086971283,4545
951.0,190.2,-0.22761140763759613,4550
1000.0,200.0,-0.20279309153556824,4555
992.0,198.4,-0.21495231986045837,4560
849.0,169.8,-0.24093472957611084,4565
1000.0,200.0,-0.21322326362133026,4570
978.0,195.6,-0.22428254783153534,4575
1000.0,200.0,-0.21417734026908875,4580
1000.0,200.0,-0.20100240409374237,4585
1000.0,200.0,-0.20599228143692017,4590
1000.0,200.0,-0.23221741616725922,4595
940.0,188.0,-0.22649583220481873,4600
974.0,194.8,-0.22526900470256805,4605
940.0,188.0,-0.23177145421504974,4610
1000.0,200.0,-0.2212996780872345,4615
1000.0,200.0,-0.22226651012897491,4620
1000.0,200.0,-0.20896723866462708,4625
1000.0,200.0,-0.23121048510074615,4630
1000.0,200.0,-0.222879558801651,4635
1000.0,200.0,-0.2117454558610916,4640
1000.0,200.0,-0.23424331843852997,4645
1000.0,200.0,-0.23412813246250153,4650
971.0,194.2,-0.2223711609840393,4655
1000.0,200.0,-0.22414419054985046,4660
1000.0,200.0,-0.2104741632938385,4665
1000.0,200.0,-0.23048822581768036,4670
1000.0,200.0,-0.2183581441640854,4675
1000.0,200.0,-0.23302426934242249,4680
1000.0,200.0,-0.22289453446865082,4685
1000.0,200.0,-0.21986311674118042,4690
996.0,199.2,-0.23436616361141205,4695
1000.0,200.0,-0.2223847359418869,4700
1000.0,200.0,-0.22365456819534302,4705
1000.0,200.0,-0.23042644560337067,4710
1000.0,200.0,-0.22056877613067627,4715
1000.0,200.0,-0.22903500497341156,4720
797.0,159.4,-0.2448205053806305,4725
984.0,196.8,-0.23969906568527222,4730
918.0,183.6,-0.24919341504573822,4735
1000.0,200.0,-0.24023643136024475,4740
979.0,195.8,-0.24294376373291016,4745
905.0,181.0,-0.2351226955652237,4750
993.0,198.6,-0.21702656149864197,4755
1000.0,200.0,-0.22933296859264374,4760
999.0,199.8,-0.23117783665657043,4765
1000.0,200.0,-0.21749353408813477,4770
981.0,196.2,-0.23020605742931366,4775
1000.0,200.0,-0.22406738996505737,4780
1000.0,200.0,-0.20574606955051422,4785
955.0,191.0,-0.23178116977214813,4790
1000.0,200.0,-0.20908139646053314,4795
1000.0,200.0,-0.2139613926410675,4800
1000.0,200.0,-0.22396966814994812,4805
1000.0,200.0,-0.210700124502182,4810
1000.0,200.0,-0.22813169658184052,4815
1000.0,200.0,-0.21640326082706451,4820
1000.0,200.0,-0.23403076827526093,4825
1000.0,200.0,-0.2196320742368698,4830
1000.0,200.0,-0.22289463877677917,4835
1000.0,200.0,-0.23845568299293518,4840
1000.0,200.0,-0.21450844407081604,4845
1000.0,200.0,-0.22363585233688354,4850
1000.0,200.0,-0.20612011849880219,4855
1000.0,200.0,-0.20918257534503937,4860
1000.0,200.0,-0.23008960485458374,4865
1000.0,200.0,-0.2219572514295578,4870
910.0,182.0,-0.2174006849527359,4875
1000.0,200.0,-0.21942415833473206,4880
1000.0,200.0,-0.2067996859550476,4885
1000.0,200.0,-0.21193228662014008,4890
1000.0,200.0,-0.2176903337240219,4895
1000.0,200.0,-0.20393991470336914,4900
1000.0,200.0,-0.21651402115821838,4905
966.0,193.2,-0.216569185256958,4910
1000.0,200.0,-0.22076433897018433,4915
979.0,195.8,-0.2166718691587448,4920
1000.0,200.0,-0.2253253012895584,4925
1000.0,200.0,-0.239028200507164,4930
1000.0,200.0,-0.2164914757013321,4935
1000.0,200.0,-0.20864802598953247,4940
1000.0,200.0,-0.2230301946401596,4945
980.0,196.0,-0.23694653809070587,4950
907.0,181.4,-0.23473422229290009,4955
973.0,194.6,-0.22844122350215912,4960
1000.0,200.0,-0.22364303469657898,4965
881.0,176.2,-0.23789668083190918,4970
1000.0,200.0,-0.21272939443588257,4975
974.0,194.8,-0.2289992868900299,4980
862.0,172.4,-0.23540543019771576,4985
914.0,182.8,-0.23260311782360077,4990
737.0,147.4,-0.2543960213661194,4995
================================================
FILE: history/ppo1.csv
================================================
Episode_reward,episode
-1037.2442064560664,0
-1847.6011742012683,1
-1572.0879549063445,2
-1909.3375551414695,3
-1625.804678424486,4
-1458.3791532216028,5
-1676.7294710872982,6
-1880.5057794936977,7
-1714.7693216217642,8
-1823.5511048861317,9
-1646.443571113762,10
-1791.0004302144125,11
-1665.68509294836,12
-1539.889248662342,13
-1767.7738309574574,14
-1572.8167680134982,15
-1782.487852652908,16
-1627.7773814954378,17
-1798.02694320105,18
-1043.1123802930633,19
-1824.8765786915278,20
-1738.5748540276559,21
-1615.4604821214007,22
-1615.866218962353,23
-1756.9861376765596,24
-1776.4853929124988,25
-1571.456568416333,26
-1731.0225055701799,27
-1587.0271869127716,28
-1617.4324253980737,29
-1675.8339629364875,30
-1589.88151645331,31
-1520.736910439898,32
-1703.4725034879411,33
-1592.095144261398,34
-1488.159085305198,35
-1655.3300268675887,36
-1416.3679355346933,37
-1433.1090706415507,38
-1531.0053852655142,39
-1567.0351726159126,40
-1588.564338627357,41
-1182.073475448932,42
-1475.0963413565785,43
-1454.7558679603126,44
-1229.3876682396108,45
-1468.3415804526874,46
-1343.0820826356976,47
-1513.5150837470433,48
-1060.864898279517,49
-1466.2970468041235,50
-1466.4791867410315,51
-1414.8144066165612,52
-1404.5800318908703,53
-1361.4837234088805,54
-1356.8567702395721,55
-1348.8952982791732,56
-1413.3690608311895,57
-1375.0557294954053,58
-1321.5190279442513,59
-1372.1536567426783,60
-1140.597696688407,61
-1402.6687097798392,62
-1574.6781229328142,63
-1576.7077879607637,64
-1561.1441634778998,65
-1200.2874926818092,66
-1222.3030216899904,67
-1080.4847657134544,68
-1439.779979755708,69
-1406.331773423733,70
-1052.8787347372734,71
-1557.7074680390858,72
-1559.689294638684,73
-1561.5088918414517,74
-1113.9055807595837,75
-1315.7917468029939,76
-1351.3446651057125,77
-1454.1523051798201,78
-1309.8238089783406,79
-1268.8879586322907,80
-1194.4745312273606,81
-1389.2316932804683,82
-1270.7457267870166,83
-1333.9654163853515,84
-1338.753110585217,85
-1398.082088065302,86
-1138.7920123870028,87
-1346.608755810008,88
-1281.9234448097484,89
-1324.6354138259035,90
-1236.892077361457,91
-1472.552788664081,92
-1217.9659027640732,93
-1170.3323537084118,94
-1179.7302522054513,95
-1199.4833544268613,96
-1097.9397919020873,97
-1172.5576964875197,98
-1524.7203645772004,99
-1516.3794741179613,100
-1171.0919390972815,101
-1292.1748244736343,102
-1374.3293422127795,103
-1100.207684511091,104
-1352.3333838116143,105
-1090.0873226110825,106
-1313.6740663009105,107
-1327.38168174021,108
-1083.1294573788716,109
-983.6142335865583,110
-881.3843361284513,111
-777.6955586752294,112
-1523.2942760021479,113
-643.1245561336376,114
-967.2842059822092,115
-1363.9080690602211,116
-1536.377511573882,117
-1060.1031102279994,118
-1127.3393896240152,119
-1355.9448272702484,120
-1383.99389366388,121
-1508.803686267915,122
-1349.5714176609602,123
-1348.4093299734961,124
-979.3129210110255,125
-1109.0927160725516,126
-1494.3358662370995,127
-1232.529477955824,128
-1198.2221484953939,129
-1204.3422832071053,130
-1223.259894601989,131
-1048.264869873165,132
-1214.9576570813924,133
-1233.7189467355558,134
-781.7075925291913,135
-1353.194273574951,136
-685.7806108704868,137
-1287.1007558246267,138
-1230.9323991845604,139
-1186.4067360770057,140
-786.4065882045364,141
-1001.600194957268,142
-915.6642388726257,143
-1092.0082532491876,144
-675.889350695875,145
-873.1397630522355,146
-1184.5470871826124,147
-1041.735120338675,148
-645.3836706510756,149
-594.7457941205153,150
-924.9620007988684,151
-525.3225826681372,152
-515.1276436894625,153
-1383.728730884463,154
-525.0847542187588,155
-730.2522633554491,156
-588.412968141994,157
-644.2747986192634,158
-965.5322075506963,159
-1413.4680491967438,160
-1160.905794333069,161
-653.7941212692359,162
-784.4573284539567,163
-1060.4051048850272,164
-917.8574134621971,165
-654.9807166763895,166
-754.147898396826,167
-521.2704617968814,168
-1328.2391181587886,169
-1229.9026965641324,170
-520.406713854922,171
-540.8686159112228,172
-652.6905320159613,173
-1083.3322644848085,174
-518.2184614659982,175
-520.5349799468748,176
-643.2233794775584,177
-1417.944931635821,178
-390.4077956799553,179
-652.8597513827308,180
-1101.2712486358153,181
-516.4330096473971,182
-638.7717540256594,183
-659.9938717646102,184
-511.3881267579837,185
-390.85780135927683,186
-392.2780789936845,187
-812.4970002494532,188
-1415.7206853874468,189
-783.2809505461622,190
-434.011046630424,191
-1140.6721625852579,192
-911.2346112488109,193
-1038.8824870828973,194
-953.7735338040277,195
-832.0679492311945,196
-391.5803152777578,197
-594.7326684632785,198
-134.36126154017978,199
-263.1388449951507,200
-263.35297556848843,201
-2.066724454856968,202
-263.7467460787438,203
-547.8040819121052,204
-396.19772514454803,205
-1.5163648546321538,206
-1072.4688682420128,207
-537.4239657853051,208
-406.2957933943553,209
-812.1427491763986,210
-524.538393880087,211
-130.09623397331515,212
-546.0428094436602,213
-1094.4604125441156,214
-1049.990387896455,215
-215.16348544943915,216
-663.9809345973752,217
-132.85756077729036,218
-265.1615309177778,219
-839.9940494275294,220
-522.7645163607622,221
-382.3034934689336,222
-264.7068957473173,223
-262.36920139541087,224
-696.4631737266078,225
-683.2932880126467,226
-693.0030169321484,227
-255.9343966393008,228
-382.15861079855944,229
-133.72960816436733,230
-135.63361762344363,231
-1070.3947800142726,232
-261.77685016344583,233
-786.6645231796923,234
-524.2769697482614,235
-904.6443397757624,236
-395.87852836714035,237
-395.7256764528719,238
-130.2041242713439,239
-258.95821903796764,240
-392.74850252299376,241
-932.9703316790616,242
-130.53268719654994,243
-133.11015787559413,244
-262.90512124240774,245
-397.52978082155977,246
-806.9252845000568,247
-404.94792012056337,248
-1.344683358323401,249
-265.03263166243835,250
-393.69235487465875,251
-397.18762513507096,252
-261.93852467722917,253
-0.6932713815189028,254
-0.2772104986206239,255
-134.84329433614684,256
-0.3018889248722081,257
-262.4557578330868,258
-410.7676257379418,259
-0.20614191128882048,260
-135.90980010963827,261
-1.5306243617420219,262
-0.2775277070912824,263
-262.20553184217437,264
-262.75668594102484,265
-265.478691889229,266
-649.5840602275849,267
-2.3359025016926287,268
-134.37728184841674,269
-403.79532548294,270
-131.44685457390344,271
-2.540079220521762,272
-0.6245710844806219,273
-552.3203313210981,274
-420.7688029635937,275
-673.1561228210983,276
-566.6329676342501,277
-826.7218302286809,278
-133.19607412292007,279
-261.41161243420396,280
-130.1503432567707,281
-745.9951207605527,282
-593.1009909805875,283
-422.63194819788635,284
-264.352460975422,285
-132.6281002698094,286
-272.5944476952579,287
-276.52496884585025,288
-2.8595270844298355,289
-134.3542481081264,290
-863.0600084193688,291
-0.5838006191185878,292
-260.12384955626436,293
-1092.3520075256804,294
-134.47579544793888,295
-263.08226456567047,296
-263.14208863751736,297
-403.6384862962421,298
-676.3129769613263,299
-259.5850824851148,300
-0.5599792911842503,301
-841.3971214232955,302
-400.68013796627827,303
-261.9411324355016,304
-134.2541162358826,305
-400.7488164167632,306
-933.7718406562569,307
-395.58837818645895,308
-781.4991873840422,309
-542.4794573052475,310
-261.768990759702,311
-411.4022209681378,312
-908.6098705476467,313
-572.054132096904,314
-406.3605476140625,315
-0.9560144286465458,316
-419.8098582593684,317
-135.8733465246141,318
-677.8877177512418,319
-1.061000351544841,320
-420.8935070674562,321
-410.50094553986,322
-527.1414088279827,323
-405.8131292952111,324
-133.82488800602135,325
-410.9355644212376,326
-1138.5231045218584,327
-265.60551146761236,328
-550.6279081029494,329
-541.8809285146793,330
-262.00608491340074,331
-138.26408244291625,332
-539.0046046868695,333
-263.20194280779947,334
-558.728749138611,335
-542.6315237626562,336
-396.69494447112845,337
-442.68983986420074,338
-551.2123324409539,339
-731.7927987021761,340
-1009.4307379021903,341
-130.3130437347376,342
-703.3807523268074,343
-829.610762955033,344
-268.59947452925485,345
-396.18740801744417,346
-735.5741244350246,347
-139.65813774324894,348
-264.5108789752626,349
-135.02709500526709,350
-1076.030566209553,351
-537.6501749525565,352
-726.1607610041605,353
-132.67641477529017,354
-129.46648686544765,355
-0.6651282195807512,356
-833.3597025127152,357
-398.07084913357073,358
-536.8416493729624,359
-276.5888057271474,360
-129.91619176523687,361
-552.5026032887117,362
-132.61990212646703,363
-1045.702934322954,364
-136.72606810089414,365
-1355.3987029443895,366
-819.6105195876539,367
-136.38162152875142,368
-129.88447417757365,369
-556.502164519348,370
-867.8536695272958,371
-3.1904222595638547,372
-1214.2744927178414,373
-399.04245115980603,374
-267.0060835194837,375
-597.6943316498787,376
-400.03826166480053,377
-136.40065631097923,378
-403.3178441063344,379
-129.41396951948283,380
-600.7776614286838,381
-414.1901618076007,382
-546.7017304910571,383
-267.75816212558755,384
-612.9366155119945,385
-1058.307874636493,386
-133.0263198111637,387
-398.33869436455507,388
-626.9282848155667,389
-1.456315515841496,390
-1070.830576551045,391
-2.915776200583376,392
-263.8466150169842,393
-796.4792493574953,394
-265.7777444052896,395
-3.8084763401136645,396
-606.3397790085836,397
-406.5259438648021,398
-264.4538015852596,399
-421.1294585179861,400
-268.94297207424785,401
-264.328347182195,402
-425.5756864303751,403
-407.931805660131,404
-1030.610987231829,405
-538.4649919067501,406
-3.6747535767855886,407
-678.8135259365788,408
-686.1830668911863,409
-273.6093020323261,410
-132.41383735644447,411
-264.83687975071825,412
-818.0039043899321,413
-609.9351631453789,414
-128.80103209940646,415
-268.2065650529604,416
-135.85894383504206,417
-271.5610220919052,418
-693.9040568212417,419
-1.8509163368128447,420
-410.9801257663261,421
-853.0493149225435,422
-133.21021612525206,423
-0.8437241243893877,424
-130.65573438789744,425
-542.845174654451,426
-135.6855244155601,427
-417.52309785513864,428
-0.7581486423250411,429
-266.26996934460556,430
-263.94899093386437,431
-736.3778654066004,432
-133.66564996545347,433
-267.49172424982123,434
-133.87407892344285,435
-132.2231734146608,436
-555.2525216747292,437
-1.8491548042185535,438
-565.7249895439152,439
-404.9796413234927,440
-267.8873702005579,441
-1.5675334840621846,442
-129.49254983614827,443
-1300.3818970915056,444
-266.80829310210686,445
-132.846667239015,446
-410.35682063110113,447
-760.7076317443073,448
-1.0388158036790809,449
-562.2060873809967,450
-407.10572427607997,451
-136.4642818575228,452
-3.06135888344035,453
-1221.50293375514,454
-136.77496013669295,455
-737.2495485798808,456
-137.50006864952155,457
-130.33342296721955,458
-4.692447559268221,459
-3.0063944854129865,460
-135.00453947709414,461
-397.64065865936266,462
-3.561275635032984,463
-780.3775885951651,464
-532.2664055760271,465
-131.92450427568124,466
-1.6046697510118604,467
-133.00177858870154,468
-269.9109942657057,469
-409.1694204055191,470
-603.3617792436297,471
-1.3767554201233199,472
-413.17201205566533,473
-532.8892182144033,474
-263.2455839262573,475
-277.02963483896184,476
-560.1389932556149,477
-571.890657804561,478
-403.8028746209018,479
-804.407380454556,480
-1.0695254030414343,481
-416.2029772547103,482
-133.83507168589958,483
-940.8086921348546,484
-408.28469089165543,485
-274.042746604528,486
-589.1994466203586,487
-788.5111259162453,488
-567.6472927943756,489
-2.8289954734150604,490
-685.3729429161508,491
-268.206649921637,492
gitextract_0r3h2dqf/
├── .gitignore
├── A3C_sparse.py
├── AC_continue.py
├── AC_sparse.py
├── DDPG.py
├── DQN.py
├── DRL.py
├── DoubleDQN.py
├── DuelingDQN.py
├── NatureDQN.py
├── PPO_TF.py
├── PolicyNetwork.py
├── README.md
├── game/
│ ├── CartPole.py
│ └── Pendulum.py
├── history/
│ ├── a3c_sparse.csv
│ ├── ac_continue.csv
│ ├── ac_sparse.csv
│ ├── ddpg.csv
│ ├── ddqn.csv
│ ├── dueling.csv
│ ├── ndqn.csv
│ ├── pg.csv
│ ├── ppo1.csv
│ └── ppo2.csv
└── model/
├── actor_a3cs.h5
├── actor_acs.h5
├── critic_a3cs.h5
├── critic_acs.h5
├── ddpg_actor.h5
├── ddpg_critic.h5
├── ddqn.h5
├── dueling.h5
├── ndqn.h5
└── pg.h5
SYMBOL INDEX (100 symbols across 13 files)
FILE: A3C_sparse.py
class A3C (line 23) | class A3C:
method __init__ (line 26) | def __init__(self):
method _build_actor (line 39) | def _build_actor(self):
method _build_critic (line 51) | def _build_critic(self):
method _build_model (line 63) | def _build_model(self):
method _build_optimizer (line 73) | def _build_optimizer(self):
method train (line 104) | def train(self, episode, n_thread, update_iter):
method load (line 126) | def load(self):
method save (line 133) | def save(self):
class Agent (line 140) | class Agent(threading.Thread):
method __init__ (line 143) | def __init__(self, index, actor, critic, optimizer, gamma, episode, up...
method run (line 156) | def run(self):
method discount_reward (line 203) | def discount_reward(self, rewards, next_state, done):
method train_episode (line 228) | def train_episode(self, states, actions, rewards, next_observation, do...
function save_history (line 247) | def save_history(history, name):
function play (line 256) | def play(model):
FILE: AC_continue.py
class AC (line 14) | class AC(DRL):
method __init__ (line 18) | def __init__(self):
method _build_actor (line 33) | def _build_actor(self):
method _build_critic (line 52) | def _build_critic(self):
method _actor_loss (line 64) | def _actor_loss(self, y_true, y_pred):
method discount_reward (line 90) | def discount_reward(self, next_states, reward):
method choice_action (line 103) | def choice_action(self, x):
method train (line 120) | def train(self, episode):
method play (line 184) | def play(self):
FILE: AC_sparse.py
class AC (line 14) | class AC(DRL):
method __init__ (line 17) | def __init__(self):
method load (line 25) | def load(self):
method _build_actor (line 30) | def _build_actor(self):
method _build_critic (line 42) | def _build_critic(self):
method _actor_loss (line 54) | def _actor_loss(self, y_true, y_pred):
method discount_reward (line 73) | def discount_reward(self, next_states, reward, done):
method train (line 89) | def train(self, episode):
FILE: DDPG.py
class DDPG (line 18) | class DDPG(DRL):
method __init__ (line 21) | def __init__(self):
method load (line 61) | def load(self):
method _build_actor (line 66) | def _build_actor(self):
method _build_critic (line 80) | def _build_critic(self):
method actor_optimizer (line 96) | def actor_optimizer(self):
method critic_gradient (line 114) | def critic_gradient(self):
method OU (line 128) | def OU(self, x, mu=0, theta=0.15, sigma=0.2):
method get_action (line 143) | def get_action(self, X):
method remember (line 157) | def remember(self, state, action, reward, next_state, done):
method update_epsilon (line 170) | def update_epsilon(self):
method process_batch (line 176) | def process_batch(self, batch):
method update_model (line 207) | def update_model(self, X1, X2, y):
method update_target_model (line 231) | def update_target_model(self):
method train (line 249) | def train(self, episode, batch):
method play (line 300) | def play(self):
FILE: DQN.py
class DQN (line 15) | class DQN(DRL):
method __init__ (line 18) | def __init__(self):
method load (line 34) | def load(self):
method build_model (line 38) | def build_model(self):
method egreedy_action (line 52) | def egreedy_action(self, state):
method remember (line 66) | def remember(self, state, action, reward, next_state, done):
method update_epsilon (line 78) | def update_epsilon(self):
method process_batch (line 84) | def process_batch(self, batch):
method train (line 111) | def train(self, episode, batch):
FILE: DRL.py
class DRL (line 9) | class DRL:
method __init__ (line 10) | def __init__(self):
method play (line 19) | def play(self, m='pg'):
method plot (line 52) | def plot(self, history):
method save_history (line 69) | def save_history(self, history, name):
FILE: DoubleDQN.py
class DDQN (line 9) | class DDQN(DQN):
method __init__ (line 12) | def __init__(self):
method load (line 19) | def load(self):
method update_target_model (line 23) | def update_target_model(self):
method process_batch (line 28) | def process_batch(self, batch):
method train (line 55) | def train(self, episode, batch):
FILE: DuelingDQN.py
class DuelingDQN (line 13) | class DuelingDQN(NDQN):
method __init__ (line 16) | def __init__(self):
method load (line 19) | def load(self):
method build_model (line 23) | def build_model(self):
method train (line 43) | def train(self, episode, batch):
FILE: NatureDQN.py
class NDQN (line 9) | class NDQN(DQN):
method __init__ (line 12) | def __init__(self):
method load (line 19) | def load(self):
method update_target_model (line 23) | def update_target_model(self):
method process_batch (line 28) | def process_batch(self, batch):
method train (line 54) | def train(self, episode, batch):
FILE: PPO_TF.py
class PPO (line 8) | class PPO:
method __init__ (line 9) | def __init__(self, ep, batch, t='ppo2'):
method _build_critic (line 33) | def _build_critic(self):
method _build_actor (line 42) | def _build_actor(self, name, trainable):
method build_model (line 57) | def build_model(self):
method choose_action (line 107) | def choose_action(self, state):
method get_value (line 121) | def get_value(self, state):
method discount_reward (line 134) | def discount_reward(self, states, rewards, next_observation):
method update (line 161) | def update(self, states, action, dr):
method train (line 204) | def train(self):
method save_history (line 249) | def save_history(self, history, name):
FILE: PolicyNetwork.py
class PolicyNetwork (line 14) | class PolicyNetwork(DRL):
method __init__ (line 17) | def __init__(self):
method load (line 23) | def load(self):
method _build_model (line 27) | def _build_model(self):
method loss (line 39) | def loss(self, y_true, y_pred):
method discount_reward (line 57) | def discount_reward(self, rewards):
method train (line 75) | def train(self, episode, batch):
FILE: game/CartPole.py
function try_gym (line 6) | def try_gym():
FILE: game/Pendulum.py
function try_gym (line 5) | def try_gym():
Condensed preview — 35 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (243K chars).
[
{
"path": ".gitignore",
"chars": 1203,
"preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
},
{
"path": "A3C_sparse.py",
"chars": 8560,
"preview": "# -*- coding: utf-8 -*-\nimport os\nimport gym\nimport time\nimport threading\n\nimport numpy as np\nimport pandas as pd\nimport"
},
{
"path": "AC_continue.py",
"chars": 6160,
"preview": "# -*- coding: utf-8 -*-\nimport os\nimport gym\nimport numpy as np\n\nfrom keras.layers import Input, Dense, concatenate, Lam"
},
{
"path": "AC_sparse.py",
"chars": 4698,
"preview": "# -*- coding: utf-8 -*-\nimport os\n\nimport numpy as np\n\nfrom keras.layers import Input, Dense\nfrom keras.models import Mo"
},
{
"path": "DDPG.py",
"chars": 10276,
"preview": "# -*- coding: utf-8 -*-\nimport os\nimport random\nimport gym\nfrom collections import deque\n\nimport numpy as np\nimport tens"
},
{
"path": "DQN.py",
"chars": 4544,
"preview": "# -*- coding: utf-8 -*-\nimport os\nimport random\nimport numpy as np\n\nfrom collections import deque\n\nfrom keras.layers imp"
},
{
"path": "DRL.py",
"chars": 1924,
"preview": "# -*- coding: utf-8 -*-\nimport os\nimport gym\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\n\n\ncl"
},
{
"path": "DoubleDQN.py",
"chars": 3287,
"preview": "# -*- coding: utf-8 -*-\nimport os\nimport random\nimport numpy as np\n\nfrom DQN import DQN\n\n\nclass DDQN(DQN):\n \"\"\"Nature"
},
{
"path": "DuelingDQN.py",
"chars": 2908,
"preview": "# -*- coding: utf-8 -*-\nimport os\nimport numpy as np\n\nfrom keras.layers import Input, Dense, Add, Subtract, Lambda\nfrom "
},
{
"path": "NatureDQN.py",
"chars": 3207,
"preview": "# -*- coding: utf-8 -*-\nimport os\nimport random\nimport numpy as np\n\nfrom DQN import DQN\n\n\nclass NDQN(DQN):\n \"\"\"Nature"
},
{
"path": "PPO_TF.py",
"chars": 8164,
"preview": "import os\nimport gym\nimport numpy as np\nimport pandas as pd\nimport tensorflow as tf\n\n\nclass PPO:\n def __init__(self, "
},
{
"path": "PolicyNetwork.py",
"chars": 4266,
"preview": "# -*- coding: utf-8 -*-\nimport os\n\nimport numpy as np\n\nfrom keras.layers import Input, Dense\nfrom keras.models import Mo"
},
{
"path": "README.md",
"chars": 1433,
"preview": "# Deep-Reinforcement-Learning-Practice\nPractice of Deep Reinforcement Learning with Keras and gym.\n\nContinuous updating."
},
{
"path": "game/CartPole.py",
"chars": 804,
"preview": "# -*- coding: utf-8 -*-\nimport gym\nimport numpy as np\n\n\ndef try_gym():\n # creat CartPole env.\n env = gym.make('Car"
},
{
"path": "game/Pendulum.py",
"chars": 811,
"preview": "# -*- coding: utf-8 -*-\nimport gym\n\n\ndef try_gym():\n # creat Pendulum env.\n env = gym.make('Pendulum-v0')\n # re"
},
{
"path": "history/a3c_sparse.csv",
"chars": 20605,
"preview": "episode,Episode_reward\n0,11.0\n1,17.0\n2,17.0\n3,13.0\n4,14.0\n5,22.0\n6,34.0\n7,14.0\n8,20.0\n9,25.0\n10,14.0\n11,12.0\n12,14.0\n13,"
},
{
"path": "history/ac_continue.csv",
"chars": 32613,
"preview": "Episode_reward,actor_loss,critic_loss,episode\n-128.90520572493745,-0.11816113442182541,0.218042254447937,0\n-173.51670990"
},
{
"path": "history/ac_sparse.csv",
"chars": 14607,
"preview": "Episode_reward,actor_loss,critic_loss,episode\n46.0,0.15708111226558685,10.703339576721191,0\n11.0,-0.28331494331359863,19"
},
{
"path": "history/ddpg.csv",
"chars": 8297,
"preview": "Episode_reward,Loss,episode\n-1509.9431573465693,9.224396642297506,0\n-1865.7656558462838,0.2480768134398386,1\n-1285.63196"
},
{
"path": "history/ddqn.csv",
"chars": 3569,
"preview": "Episode_reward,Loss,episode\n11.0,inf,0\n12.0,0.4681931138038635,5\n15.0,1.190276861190796,10\n12.0,1.9253158569335938,15\n14"
},
{
"path": "history/dueling.csv",
"chars": 3515,
"preview": "Episode_reward,Loss,episode\n30.0,inf,0\n12.0,0.47862666845321655,5\n10.0,0.5809901356697083,10\n8.0,1.5168644189834595,15\n1"
},
{
"path": "history/ndqn.csv",
"chars": 3582,
"preview": "Episode_reward,Loss,episode\n12.0,inf,0\n39.0,0.5968765616416931,5\n10.0,0.16437163949012756,10\n8.0,0.8609060645103455,15\n1"
},
{
"path": "history/pg.csv",
"chars": 36605,
"preview": "Batch_reward,Episode_reward,Loss,episode\n126.0,25.2,-0.323914110660553,5\n170.0,34.0,-0.3671955466270447,10\n95.0,19.0,-0."
},
{
"path": "history/ppo1.csv",
"chars": 23331,
"preview": "Episode_reward,episode\n-1037.2442064560664,0\n-1847.6011742012683,1\n-1572.0879549063445,2\n-1909.3375551414695,3\n-1625.804"
},
{
"path": "history/ppo2.csv",
"chars": 23400,
"preview": "Episode_reward,episode\n-1673.3837361587691,0\n-1611.2107172299563,1\n-1659.4160521895024,2\n-1617.9072409202724,3\n-1861.967"
}
]
// ... and 10 more files (download for full content)
About this extraction
This page contains the full source code of the xiaochus/Deep-Reinforcement-Learning-Practice GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 35 files (226.9 KB), approximately 101.1k tokens, and a symbol index with 100 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.