Repository: matroid/dlwithtf
Branch: master
Commit: 2eb79b9bb845
Files: 22
Total size: 112.4 KB
Directory structure:
gitextract_33683wml/
├── README.md
├── ch3/
│ ├── linear_regression_tf.py
│ ├── linear_regression_tf_simple.py
│ └── logistic_regression_tf.py
├── ch4/
│ ├── fcnet_classification_tf.py
│ ├── fcnet_regression_tf.py
│ ├── tox21_fcnet.py
│ └── tox21_fcnet_dropout.py
├── ch5/
│ ├── fcnet_func.py
│ ├── hidden_grid_search.py
│ ├── simple_grid_search.py
│ └── tox21_rf.py
├── ch6/
│ └── convolutional.py
├── ch7/
│ ├── ptb_word_lm.py
│ ├── reader.py
│ └── setup.sh
├── ch8/
│ ├── a3c.py
│ ├── environment.py
│ ├── tensorgraph.py
│ └── tictactoe.py
└── ch9/
├── cifar10.py
└── cifar10_multi_gpu_train.py
================================================
FILE CONTENTS
================================================
================================================
FILE: README.md
================================================
# TensorFlow for Deep Learning Companion Code
Referenced throughout the book.

## TensorFlow Versions
The TensorFlow library has been evolving rapidly in the last couple years, and some of the code in this repo and the associated book no longer work with the latest versions of TensorFlow. We recommend using TensorFlow 1.6 for working through all exercises in this book. We are looking into creating a full `requirements.txt` file for all needed dependencies and hope to have that available for you soon.
We also welcome any PRs that modify code to work with more recent TensorFlow versions. We are looking into these upgrades on our end as well.
================================================
FILE: ch3/linear_regression_tf.py
================================================
import numpy as np
np.random.seed(456)
import tensorflow as tf
tf.set_random_seed(456)
from matplotlib import rc
rc('text', usetex=True)
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error
def pearson_r2_score(y, y_pred):
"""Computes Pearson R^2 (square of Pearson correlation)."""
return pearsonr(y, y_pred)[0]**2
def rms_score(y_true, y_pred):
"""Computes RMS error."""
return np.sqrt(mean_squared_error(y_true, y_pred))
# Generate synthetic data
N = 100
w_true = 5
b_true = 2
noise_scale = .1
x_np = np.random.rand(N, 1)
noise = np.random.normal(scale=noise_scale, size=(N, 1))
# Convert shape of y_np to (N,)
y_np = np.reshape(w_true * x_np + b_true + noise, (-1))
# Save image of the data distribution
plt.scatter(x_np, y_np)
plt.xlabel("x")
plt.ylabel("y")
plt.xlim(0, 1)
plt.title("Toy Linear Regression Data, "
r"$y = 5x + 2 + N(0, 1)$")
plt.savefig("lr_data.png")
# Generate tensorflow graph
with tf.name_scope("placeholders"):
x = tf.placeholder(tf.float32, (N, 1))
y = tf.placeholder(tf.float32, (N,))
with tf.name_scope("weights"):
W = tf.Variable(tf.random_normal((1, 1)))
b = tf.Variable(tf.random_normal((1,)))
with tf.name_scope("prediction"):
y_pred = tf.matmul(x, W) + b
with tf.name_scope("loss"):
l = tf.reduce_sum((y - tf.squeeze(y_pred))**2)
with tf.name_scope("optim"):
train_op = tf.train.AdamOptimizer(.001).minimize(l)
with tf.name_scope("summaries"):
tf.summary.scalar("loss", l)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('/tmp/lr-train', tf.get_default_graph())
n_steps = 8000
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Train model
for i in range(n_steps):
feed_dict = {x: x_np, y: y_np}
_, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)
print("step %d, loss: %f" % (i, loss))
train_writer.add_summary(summary, i)
# Get weights
w_final, b_final = sess.run([W, b])
# Make Predictions
y_pred_np = sess.run(y_pred, feed_dict={x: x_np})
y_pred_np = np.reshape(y_pred_np, -1)
r2 = pearson_r2_score(y_np, y_pred_np)
print("Pearson R^2: %f" % r2)
rms = rms_score(y_np, y_pred_np)
print("RMS: %f" % rms)
# Clear figure
plt.clf()
plt.xlabel("Y-true")
plt.ylabel("Y-pred")
plt.title("Predicted versus True values "
r"(Pearson $R^2$: $0.994$)")
plt.scatter(y_np, y_pred_np)
plt.savefig("lr_pred.png")
# Now draw with learned regression line
plt.clf()
plt.xlabel("x")
plt.ylabel("y")
plt.title("True Model versus Learned Model "
r"(RMS: $1.027620$)")
plt.xlim(0, 1)
plt.scatter(x_np, y_np)
x_left = 0
y_left = w_final[0]*x_left + b_final
x_right = 1
y_right = w_final[0]*x_right + b_final
plt.plot([x_left, x_right], [y_left, y_right], color='k')
plt.savefig("lr_learned.png")
================================================
FILE: ch3/linear_regression_tf_simple.py
================================================
import tensorflow as tf
d = 10
N = 100
x = tf.placeholder(tf.float32, (N, d))
y = tf.placeholder(tf.float32, (N,))
W = tf.Variable(tf.random_normal((d, 1)))
b = tf.Variable(tf.random_normal((1,)))
l = tf.reduce_sum((y - (tf.matmul(x, W) + b))**2)
with tf.Session() as sess:
tf.global_variables_initializer().run(session=sess)
================================================
FILE: ch3/logistic_regression_tf.py
================================================
import numpy as np
np.random.seed(456)
import tensorflow as tf
tf.set_random_seed(456)
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from scipy.special import logit
# Generate synthetic data
N = 100
# Zeros form a Gaussian centered at (-1, -1)
x_zeros = np.random.multivariate_normal(
mean=np.array((-1, -1)), cov=.1*np.eye(2), size=(N//2,))
y_zeros = np.zeros((N//2,))
# Ones form a Gaussian centered at (1, 1)
x_ones = np.random.multivariate_normal(
mean=np.array((1, 1)), cov=.1*np.eye(2), size=(N//2,))
y_ones = np.ones((N//2,))
x_np = np.vstack([x_zeros, x_ones])
y_np = np.concatenate([y_zeros, y_ones])
# Save image of the data distribution
plt.xlabel(r"$x_1$")
plt.ylabel(r"$x_2$")
plt.title("Toy Logistic Regression Data")
# Plot Zeros
plt.scatter(x_zeros[:, 0], x_zeros[:, 1], color="blue")
plt.scatter(x_ones[:, 0], x_ones[:, 1], color="red")
plt.savefig("logistic_data.png")
# Generate tensorflow graph
with tf.name_scope("placeholders"):
x = tf.placeholder(tf.float32, (N, 2))
y = tf.placeholder(tf.float32, (N,))
with tf.name_scope("weights"):
W = tf.Variable(tf.random_normal((2, 1)))
b = tf.Variable(tf.random_normal((1,)))
with tf.name_scope("prediction"):
y_logit = tf.squeeze(tf.matmul(x, W) + b)
# the sigmoid gives the class probability of 1
y_one_prob = tf.sigmoid(y_logit)
# Rounding P(y=1) will give the correct prediction.
y_pred = tf.round(y_one_prob)
with tf.name_scope("loss"):
# Compute the cross-entropy term for each datapoint
entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y)
# Sum all contributions
l = tf.reduce_sum(entropy)
with tf.name_scope("optim"):
train_op = tf.train.AdamOptimizer(.01).minimize(l)
with tf.name_scope("summaries"):
tf.summary.scalar("loss", l)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('/tmp/logistic-train', tf.get_default_graph())
n_steps = 1000
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Train model
for i in range(n_steps):
feed_dict = {x: x_np, y: y_np}
_, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)
print("loss: %f" % loss)
train_writer.add_summary(summary, i)
# Get weights
w_final, b_final = sess.run([W, b])
# Make Predictions
y_pred_np = sess.run(y_pred, feed_dict={x: x_np})
score = accuracy_score(y_np, y_pred_np)
print("Classification Accuracy: %f" % score)
plt.clf()
# Save image of the data distribution
plt.xlabel(r"$x_1$")
plt.ylabel(r"$x_2$")
plt.title("Learned Model (Classification Accuracy: 1.00)")
plt.xlim(-2, 2)
plt.ylim(-2, 2)
# Plot Zeros
plt.scatter(x_zeros[:, 0], x_zeros[:, 1], color="blue")
plt.scatter(x_ones[:, 0], x_ones[:, 1], color="red")
x_left = -2
y_left = (1./w_final[1]) * (-b_final + logit(.5) - w_final[0]*x_left)
x_right = 2
y_right = (1./w_final[1]) * (-b_final + logit(.5) - w_final[0]*x_right)
plt.plot([x_left, x_right], [y_left, y_right], color='k')
plt.savefig("logistic_pred.png")
================================================
FILE: ch4/fcnet_classification_tf.py
================================================
import numpy as np
np.random.seed(456)
import tensorflow as tf
tf.set_random_seed(456)
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
# Generate synthetic data
N = 100
w_true = 5
b_true = 2
noise_scale = .1
# Zeros form a Gaussian centered at (-1, -1)
x_zeros = np.random.multivariate_normal(
mean=np.array((-1, -1)), cov=.1*np.eye(2), size=(N/2,))
y_zeros = np.zeros((N/2,))
# Ones form a Gaussian centered at (1, 1)
x_ones = np.random.multivariate_normal(
mean=np.array((1, 1)), cov=.1*np.eye(2), size=(N/2,))
y_ones = np.ones((N/2,))
x_np = np.vstack([x_zeros, x_ones])
y_np = np.concatenate([y_zeros, y_ones])
# Save image of the data distribution
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")
plt.title("FCNet Classification Data")
# Plot Zeros
plt.scatter(x_zeros[:, 0], x_zeros[:, 1], color="blue")
plt.scatter(x_ones[:, 0], x_ones[:, 1], color="red")
plt.savefig("fcnet_classification_data.png")
# Generate tensorflow graph
d = 2
n_hidden = 15
with tf.name_scope("placeholders"):
x = tf.placeholder(tf.float32, (N, d))
y = tf.placeholder(tf.float32, (N,))
with tf.name_scope("layer-1"):
W = tf.Variable(tf.random_normal((d, n_hidden)))
b = tf.Variable(tf.random_normal((n_hidden,)))
x_1 = tf.nn.relu(tf.matmul(x, W) + b)
with tf.name_scope("output"):
W = tf.Variable(tf.random_normal((n_hidden, 1)))
b = tf.Variable(tf.random_normal((1,)))
y_logit = tf.squeeze(tf.matmul(x_1, W) + b)
# the sigmoid gives the class probability of 1
y_one_prob = tf.sigmoid(y_logit)
# Rounding P(y=1) will give the correct prediction.
y_pred = tf.round(y_one_prob)
with tf.name_scope("loss"):
# Compute the cross-entropy term for each datapoint
entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y)
# Sum all contributions
l = tf.reduce_sum(entropy)
with tf.name_scope("optim"):
train_op = tf.train.AdamOptimizer(.001).minimize(l)
with tf.name_scope("summaries"):
tf.summary.scalar("loss", l)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('/tmp/fcnet-classification-train',
tf.get_default_graph())
n_steps = 200
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Train model
for i in range(n_steps):
feed_dict = {x: x_np, y: y_np}
_, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)
print("step %d, loss: %f" % (i, loss))
train_writer.add_summary(summary, i)
# Make Predictions
y_pred_np = sess.run(y_pred, feed_dict={x: x_np})
score = accuracy_score(y_np, y_pred_np)
print("Classification Accuracy: %f" % score)
================================================
FILE: ch4/fcnet_regression_tf.py
================================================
import numpy as np
np.random.seed(456)
import tensorflow as tf
tf.set_random_seed(456)
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
def pearson_r2_score(y, y_pred):
"""Computes Pearson R^2 (square of Pearson correlation)."""
return pearsonr(y, y_pred)[0]**2
# Generate synthetic data
d = 1
N = 50
w_true = 5
b_true = 2
noise_scale = .1
x_np = np.random.rand(N, d)
noise = np.random.normal(scale=noise_scale, size=(N, d))
y_np = np.reshape(w_true * x_np + b_true + noise, (-1))
# Save image of the data distribution
plt.scatter(x_np, y_np)
plt.xlabel("X")
plt.ylabel("y")
plt.title("Raw Linear Regression Data")
plt.savefig("fcnet_regression_data.png")
# Generate tensorflow graph
n_hidden = 15
with tf.name_scope("placeholders"):
x = tf.placeholder(tf.float32, (N, d))
y = tf.placeholder(tf.float32, (N,))
with tf.name_scope("layer-1"):
W = tf.Variable(tf.random_normal((d, n_hidden)))
b = tf.Variable(tf.random_normal((n_hidden,)))
x_1 = tf.nn.relu(tf.matmul(x, W) + b)
with tf.name_scope("output"):
W = tf.Variable(tf.random_normal((n_hidden, 1)))
b = tf.Variable(tf.random_normal((1,)))
y_pred = tf.transpose(tf.matmul(x_1, W) + b)
with tf.name_scope("loss"):
lvec = (y - y_pred)**2
l = tf.reduce_sum(lvec)
with tf.name_scope("optim"):
train_op = tf.train.AdamOptimizer(.001).minimize(l)
with tf.name_scope("summaries"):
tf.summary.scalar("loss", l)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('/tmp/fcnet-regression-train', tf.get_default_graph())
n_steps = 1000
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Train model
for i in range(n_steps):
feed_dict = {x: x_np, y: y_np}
_, summary, loss, lossvec = sess.run([train_op, merged, l, lvec], feed_dict=feed_dict)
print("step %d, loss: %f, loss-vec-size: %s" % (i, loss, lossvec.shape))
train_writer.add_summary(summary, i)
# Make Predictions
y_pred_np = sess.run(y_pred, feed_dict={x: x_np})
y_pred_np = np.reshape(y_pred_np, -1)
r2 = pearson_r2_score(y_np, y_pred_np)
print("Pearson R^2: %f" % r2)
# Clear figure
plt.clf()
plt.xlabel("Y-true")
plt.ylabel("Y-pred")
plt.title("Predicted versus true values")
plt.scatter(y_np, y_pred_np)
plt.savefig("fcnet_regression_pred.png")
# Now draw with learned regression line
plt.clf()
plt.xlabel("X")
plt.ylabel("Y")
plt.title("Predicted versus true values")
plt.xlim(0, 1)
plt.scatter(x_np, y_np)
plt.scatter(x_np, y_pred_np)
plt.savefig("fcnet_regression_learned.png")
================================================
FILE: ch4/tox21_fcnet.py
================================================
import numpy as np
np.random.seed(456)
import tensorflow as tf
tf.set_random_seed(456)
import matplotlib.pyplot as plt
import deepchem as dc
from sklearn.metrics import accuracy_score
_, (train, valid, test), _ = dc.molnet.load_tox21()
train_X, train_y, train_w = train.X, train.y, train.w
valid_X, valid_y, valid_w = valid.X, valid.y, valid.w
test_X, test_y, test_w = test.X, test.y, test.w
# Remove extra tasks
train_y = train_y[:, 0]
valid_y = valid_y[:, 0]
test_y = test_y[:, 0]
train_w = train_w[:, 0]
valid_w = valid_w[:, 0]
test_w = test_w[:, 0]
# Generate tensorflow graph
d = 1024
n_hidden = 50
learning_rate = .001
n_epochs = 10
batch_size = 100
with tf.name_scope("placeholders"):
x = tf.placeholder(tf.float32, (None, d))
y = tf.placeholder(tf.float32, (None,))
with tf.name_scope("hidden-layer"):
W = tf.Variable(tf.random_normal((d, n_hidden)))
b = tf.Variable(tf.random_normal((n_hidden,)))
x_hidden = tf.nn.relu(tf.matmul(x, W) + b)
with tf.name_scope("output"):
W = tf.Variable(tf.random_normal((n_hidden, 1)))
b = tf.Variable(tf.random_normal((1,)))
y_logit = tf.matmul(x_hidden, W) + b
# the sigmoid gives the class probability of 1
y_one_prob = tf.sigmoid(y_logit)
# Rounding P(y=1) will give the correct prediction.
y_pred = tf.round(y_one_prob)
with tf.name_scope("loss"):
# Compute the cross-entropy term for each datapoint
y_expand = tf.expand_dims(y, 1)
entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)
# Sum all contributions
l = tf.reduce_sum(entropy)
with tf.name_scope("optim"):
train_op = tf.train.AdamOptimizer(learning_rate).minimize(l)
with tf.name_scope("summaries"):
tf.summary.scalar("loss", l)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('/tmp/fcnet-tox21',
tf.get_default_graph())
N = train_X.shape[0]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 0
for epoch in range(n_epochs):
pos = 0
while pos < N:
batch_X = train_X[pos:pos+batch_size]
batch_y = train_y[pos:pos+batch_size]
feed_dict = {x: batch_X, y: batch_y}
_, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)
print("epoch %d, step %d, loss: %f" % (epoch, step, loss))
train_writer.add_summary(summary, step)
step += 1
pos += batch_size
# Make Predictions
valid_y_pred = sess.run(y_pred, feed_dict={x: valid_X})
score = accuracy_score(valid_y, valid_y_pred)
print("Unweighted Classification Accuracy: %f" % score)
weighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)
print("Weighted Classification Accuracy: %f" % weighted_score)
================================================
FILE: ch4/tox21_fcnet_dropout.py
================================================
import numpy as np
np.random.seed(456)
import tensorflow as tf
tf.set_random_seed(456)
import matplotlib.pyplot as plt
import deepchem as dc
from sklearn.metrics import accuracy_score
_, (train, valid, test), _ = dc.molnet.load_tox21()
train_X, train_y, train_w = train.X, train.y, train.w
valid_X, valid_y, valid_w = valid.X, valid.y, valid.w
test_X, test_y, test_w = test.X, test.y, test.w
# Remove extra tasks
train_y = train_y[:, 0]
valid_y = valid_y[:, 0]
test_y = test_y[:, 0]
train_w = train_w[:, 0]
valid_w = valid_w[:, 0]
test_w = test_w[:, 0]
# Generate tensorflow graph
d = 1024
n_hidden = 50
learning_rate = .001
n_epochs = 10
batch_size = 100
dropout_prob = 1.0
with tf.name_scope("placeholders"):
x = tf.placeholder(tf.float32, (None, d))
y = tf.placeholder(tf.float32, (None,))
keep_prob = tf.placeholder(tf.float32)
with tf.name_scope("hidden-layer"):
W = tf.Variable(tf.random_normal((d, n_hidden)))
b = tf.Variable(tf.random_normal((n_hidden,)))
x_hidden = tf.nn.relu(tf.matmul(x, W) + b)
# Apply dropout
x_hidden = tf.nn.dropout(x_hidden, keep_prob)
with tf.name_scope("output"):
W = tf.Variable(tf.random_normal((n_hidden, 1)))
b = tf.Variable(tf.random_normal((1,)))
y_logit = tf.matmul(x_hidden, W) + b
# the sigmoid gives the class probability of 1
y_one_prob = tf.sigmoid(y_logit)
# Rounding P(y=1) will give the correct prediction.
y_pred = tf.round(y_one_prob)
with tf.name_scope("loss"):
# Compute the cross-entropy term for each datapoint
y_expand = tf.expand_dims(y, 1)
entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)
# Sum all contributions
l = tf.reduce_sum(entropy)
with tf.name_scope("optim"):
train_op = tf.train.AdamOptimizer(learning_rate).minimize(l)
with tf.name_scope("summaries"):
tf.summary.scalar("loss", l)
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter('/tmp/fcnet-tox21-dropout',
tf.get_default_graph())
N = train_X.shape[0]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 0
for epoch in range(n_epochs):
pos = 0
while pos < N:
batch_X = train_X[pos:pos+batch_size]
batch_y = train_y[pos:pos+batch_size]
feed_dict = {x: batch_X, y: batch_y, keep_prob: dropout_prob}
_, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)
print("epoch %d, step %d, loss: %f" % (epoch, step, loss))
train_writer.add_summary(summary, step)
step += 1
pos += batch_size
# Make Predictions (set keep_prob to 1.0 for predictions)
train_y_pred = sess.run(y_pred, feed_dict={x: train_X, keep_prob: 1.0})
valid_y_pred = sess.run(y_pred, feed_dict={x: valid_X, keep_prob: 1.0})
test_y_pred = sess.run(y_pred, feed_dict={x: test_X, keep_prob: 1.0})
train_weighted_score = accuracy_score(train_y, train_y_pred, sample_weight=train_w)
print("Train Weighted Classification Accuracy: %f" % train_weighted_score)
valid_weighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)
print("Valid Weighted Classification Accuracy: %f" % valid_weighted_score)
test_weighted_score = accuracy_score(test_y, test_y_pred, sample_weight=test_w)
print("Test Weighted Classification Accuracy: %f" % test_weighted_score)
================================================
FILE: ch5/fcnet_func.py
================================================
import numpy as np
np.random.seed(456)
import tensorflow as tf
tf.set_random_seed(456)
import matplotlib.pyplot as plt
import deepchem as dc
from sklearn.metrics import accuracy_score
def eval_tox21_hyperparams(n_hidden=50, n_layers=1, learning_rate=.001,
dropout_prob=0.5, n_epochs=45, batch_size=100,
weight_positives=True):
print("---------------------------------------------")
print("Model hyperparameters")
print("n_hidden = %d" % n_hidden)
print("n_layers = %d" % n_layers)
print("learning_rate = %f" % learning_rate)
print("n_epochs = %d" % n_epochs)
print("batch_size = %d" % batch_size)
print("weight_positives = %s" % str(weight_positives))
print("dropout_prob = %f" % dropout_prob)
print("---------------------------------------------")
d = 1024
graph = tf.Graph()
with graph.as_default():
_, (train, valid, test), _ = dc.molnet.load_tox21()
train_X, train_y, train_w = train.X, train.y, train.w
valid_X, valid_y, valid_w = valid.X, valid.y, valid.w
test_X, test_y, test_w = test.X, test.y, test.w
# Remove extra tasks
train_y = train_y[:, 0]
valid_y = valid_y[:, 0]
test_y = test_y[:, 0]
train_w = train_w[:, 0]
valid_w = valid_w[:, 0]
test_w = test_w[:, 0]
# Generate tensorflow graph
with tf.name_scope("placeholders"):
x = tf.placeholder(tf.float32, (None, d))
y = tf.placeholder(tf.float32, (None,))
w = tf.placeholder(tf.float32, (None,))
keep_prob = tf.placeholder(tf.float32)
for layer in range(n_layers):
with tf.name_scope("layer-%d" % layer):
W = tf.Variable(tf.random_normal((d, n_hidden)))
b = tf.Variable(tf.random_normal((n_hidden,)))
x_hidden = tf.nn.relu(tf.matmul(x, W) + b)
# Apply dropout
x_hidden = tf.nn.dropout(x_hidden, keep_prob)
with tf.name_scope("output"):
W = tf.Variable(tf.random_normal((n_hidden, 1)))
b = tf.Variable(tf.random_normal((1,)))
y_logit = tf.matmul(x_hidden, W) + b
# the sigmoid gives the class probability of 1
y_one_prob = tf.sigmoid(y_logit)
# Rounding P(y=1) will give the correct prediction.
y_pred = tf.round(y_one_prob)
with tf.name_scope("loss"):
# Compute the cross-entropy term for each datapoint
y_expand = tf.expand_dims(y, 1)
entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)
# Multiply by weights
if weight_positives:
w_expand = tf.expand_dims(w, 1)
entropy = w_expand * entropy
# Sum all contributions
l = tf.reduce_sum(entropy)
with tf.name_scope("optim"):
train_op = tf.train.AdamOptimizer(learning_rate).minimize(l)
with tf.name_scope("summaries"):
tf.summary.scalar("loss", l)
merged = tf.summary.merge_all()
hyperparam_str = "d-%d-hidden-%d-lr-%f-n_epochs-%d-batch_size-%d-weight_pos-%s" % (
d, n_hidden, learning_rate, n_epochs, batch_size, str(weight_positives))
train_writer = tf.summary.FileWriter('/tmp/fcnet-func-' + hyperparam_str,
tf.get_default_graph())
N = train_X.shape[0]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
step = 0
for epoch in range(n_epochs):
pos = 0
while pos < N:
batch_X = train_X[pos:pos+batch_size]
batch_y = train_y[pos:pos+batch_size]
batch_w = train_w[pos:pos+batch_size]
feed_dict = {x: batch_X, y: batch_y, w: batch_w, keep_prob: dropout_prob}
_, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)
print("epoch %d, step %d, loss: %f" % (epoch, step, loss))
train_writer.add_summary(summary, step)
step += 1
pos += batch_size
# Make Predictions (set keep_prob to 1.0 for predictions)
valid_y_pred = sess.run(y_pred, feed_dict={x: valid_X, keep_prob: 1.0})
weighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)
print("Valid Weighted Classification Accuracy: %f" % weighted_score)
return weighted_score
if __name__ == "__main__":
score = eval_tox21_hyperparams()
================================================
FILE: ch5/hidden_grid_search.py
================================================
import numpy as np
from fcnet_func import eval_tox21_hyperparams
scores = {}
n_reps = 3
hidden_sizes = [30, 60]
epochs = [15, 30, 45]
dropouts = [.5]
num_layers = [1, 2]
for rep in range(n_reps):
for n_epochs in epochs:
for hidden_size in hidden_sizes:
for dropout in dropouts:
for n_layers in num_layers:
score = eval_tox21_hyperparams(n_hidden=hidden_size, n_epochs=n_epochs,
dropout_prob=dropout, n_layers=n_layers)
if (hidden_size, n_epochs, dropout, n_layers) not in scores:
scores[(hidden_size, n_epochs, dropout, n_layers)] = []
scores[(hidden_size, n_epochs, dropout, n_layers)].append(score)
print("All Scores")
print(scores)
avg_scores = {}
for params, param_scores in scores.iteritems():
avg_scores[params] = np.mean(np.array(param_scores))
print("Scores Averaged over %d repetitions" % n_reps)
print(avg_scores)
================================================
FILE: ch5/simple_grid_search.py
================================================
import numpy as np
from fcnet_func import eval_tox21_hyperparams
scores = {}
n_reps = 3
hidden_sizes = [50]
epochs = [10]
dropouts = [.5, 1.0]
num_layers = [1, 2]
for rep in range(n_reps):
for n_epochs in epochs:
for hidden_size in hidden_sizes:
for dropout in dropouts:
for n_layers in num_layers:
score = eval_tox21_hyperparams(n_hidden=hidden_size, n_epochs=n_epochs,
dropout_prob=dropout, n_layers=n_layers)
if (hidden_size, n_epochs, dropout, n_layers) not in scores:
scores[(hidden_size, n_epochs, dropout, n_layers)] = []
scores[(hidden_size, n_epochs, dropout, n_layers)].append(score)
print("All Scores")
print(scores)
avg_scores = {}
for params, param_scores in scores.iteritems():
avg_scores[params] = np.mean(np.array(param_scores))
print("Scores Averaged over %d repetitions" % n_reps)
print(avg_scores)
================================================
FILE: ch5/tox21_rf.py
================================================
import numpy as np
np.random.seed(456)
import matplotlib.pyplot as plt
import deepchem as dc
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
_, (train, valid, test), _ = dc.molnet.load_tox21()
train_X, train_y, train_w = train.X, train.y, train.w
valid_X, valid_y, valid_w = valid.X, valid.y, valid.w
test_X, test_y, test_w = test.X, test.y, test.w
# Remove extra tasks
train_y = train_y[:, 0]
valid_y = valid_y[:, 0]
test_y = test_y[:, 0]
train_w = train_w[:, 0]
valid_w = valid_w[:, 0]
test_w = test_w[:, 0]
# Generate tensorflow graph
sklearn_model = RandomForestClassifier(
class_weight="balanced", n_estimators=50)
print("About to fit model on train set.")
sklearn_model.fit(train_X, train_y)
train_y_pred = sklearn_model.predict(train_X)
valid_y_pred = sklearn_model.predict(valid_X)
test_y_pred = sklearn_model.predict(test_X)
weighted_score = accuracy_score(train_y, train_y_pred, sample_weight=train_w)
print("Weighted train Classification Accuracy: %f" % weighted_score)
weighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)
print("Weighted valid Classification Accuracy: %f" % weighted_score)
weighted_score = accuracy_score(test_y, test_y_pred, sample_weight=test_w)
print("Weighted test Classification Accuracy: %f" % weighted_score)
================================================
FILE: ch6/convolutional.py
================================================
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Simple, end-to-end, LeNet-5-like convolutional MNIST model example.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import gzip
import os
import sys
import time
import numpy
from six.moves import urllib
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
WORK_DIRECTORY = 'data'
IMAGE_SIZE = 28
NUM_CHANNELS = 1
PIXEL_DEPTH = 255
NUM_LABELS = 10
VALIDATION_SIZE = 5000 # Size of the validation set.
SEED = 66478 # Set to None for random seed.
BATCH_SIZE = 64
NUM_EPOCHS = 10
EVAL_BATCH_SIZE = 64
EVAL_FREQUENCY = 100 # Number of steps between evaluations.
def download(filename):
"""Download the data from Yann's website, unless it's already here."""
if not os.path.exists(WORK_DIRECTORY):
os.makedirs(WORK_DIRECTORY)
filepath = os.path.join(WORK_DIRECTORY, filename)
if not os.path.exists(filepath):
filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename,
filepath)
size = os.stat(filepath).st_size
print('Successfully downloaded', filename, size, 'bytes.')
return filepath
def extract_data(filename, num_images):
"""Extract the images into a 4D tensor [image index, y, x, channels].
Values are rescaled from [0, 255] down to [-0.5, 0.5].
"""
print('Extracting', filename)
with gzip.open(filename) as bytestream:
bytestream.read(16)
buf = bytestream.read(
IMAGE_SIZE * IMAGE_SIZE * num_images * NUM_CHANNELS)
data = numpy.frombuffer(buf, dtype=numpy.uint8).astype(
numpy.float32)
# The original data consists of pixels ranging from 0-255.
# Center the data to have mean zero, and unit range.
data = (data - (255/2.0))/255
data = data.reshape(num_images, IMAGE_SIZE, IMAGE_SIZE,
NUM_CHANNELS)
return data
def extract_labels(filename, num_images):
"""Extract the labels into a vector of int64 label IDs."""
print('Extracting', filename)
with gzip.open(filename) as bytestream:
# Discard header.
bytestream.read(8)
# Read bytes for labels.
buf = bytestream.read(num_images)
labels = numpy.frombuffer(buf, dtype=numpy.uint8).astype(
numpy.int64)
return labels
def error_rate(predictions, labels):
"""Return the error rate based on dense predictions and sparse labels."""
return 100.0 - (
100.0 *
numpy.sum(numpy.argmax(predictions, 1) == labels) /
predictions.shape[0])
# We will replicate the model structure for the training subgraph, as
# well as the evaluation subgraphs, while sharing the trainable
# parameters.
def model(data, train=False):
"""The Model definition."""
# 2D convolution, with 'SAME' padding (i.e. the output feature map
# has the same size as the input). Note that {strides} is a 4D array
# whose shape matches the data layout: [image index, y, x, depth].
conv = tf.nn.conv2d(data,
conv1_weights,
strides=[1, 1, 1, 1],
padding='SAME')
# Bias and rectified linear non-linearity.
relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))
# Max pooling. The kernel size spec {ksize} also follows the layout
# of the data. Here we have a pooling window of 2, and a stride of
# 2.
pool = tf.nn.max_pool(relu,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME')
conv = tf.nn.conv2d(pool,
conv2_weights,
strides=[1, 1, 1, 1],
padding='SAME')
relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))
pool = tf.nn.max_pool(relu,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME')
# Reshape the feature map cuboid into a 2D matrix to feed it to the
# fully connected layers.
pool_shape = pool.get_shape().as_list()
reshape = tf.reshape(
pool,
[pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])
# Fully connected layer. Note that the '+' operation automatically
# broadcasts the biases.
hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)
# Add a 50% dropout during training only. Dropout also scales
# activations such that no rescaling is needed at evaluation time.
if train:
hidden = tf.nn.dropout(hidden, 0.5, seed=SEED)
return tf.matmul(hidden, fc2_weights) + fc2_biases
# Get the data.
train_data_filename = download('train-images-idx3-ubyte.gz')
train_labels_filename = download('train-labels-idx1-ubyte.gz')
test_data_filename = download('t10k-images-idx3-ubyte.gz')
test_labels_filename = download('t10k-labels-idx1-ubyte.gz')
# Extract it into numpy arrays.
train_data = extract_data(train_data_filename, 60000)
train_labels = extract_labels(train_labels_filename, 60000)
test_data = extract_data(test_data_filename, 10000)
test_labels = extract_labels(test_labels_filename, 10000)
# Generate a validation set.
validation_data = train_data[:VALIDATION_SIZE, ...]
validation_labels = train_labels[:VALIDATION_SIZE]
train_data = train_data[VALIDATION_SIZE:, ...]
train_labels = train_labels[VALIDATION_SIZE:]
num_epochs = NUM_EPOCHS
train_size = train_labels.shape[0]
# This is where training samples and labels are fed to the graph.
# These placeholder nodes will be fed a batch of training data at each
# training step using the {feed_dict} argument to the Run() call below.
train_data_node = tf.placeholder(
tf.float32,
shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
train_labels_node = tf.placeholder(tf.int64, shape=(BATCH_SIZE,))
eval_data = tf.placeholder(
tf.float32,
shape=(EVAL_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
# The variables below hold all the trainable weights. They are passed
# an initial value which will be assigned when we call:
# {tf.global_variables_initializer().run()}
conv1_weights = tf.Variable(
# 5x5 filter, depth 32.
tf.truncated_normal([5, 5, NUM_CHANNELS, 32],
stddev=0.1,
seed=SEED, dtype=tf.float32))
conv1_biases = tf.Variable(tf.zeros([32], dtype=tf.float32))
conv2_weights = tf.Variable(tf.truncated_normal(
[5, 5, 32, 64], stddev=0.1,
seed=SEED, dtype=tf.float32))
conv2_biases = tf.Variable(tf.constant(0.1, shape=[64],
dtype=tf.float32))
fc1_weights = tf.Variable( # fully connected, depth 512.
tf.truncated_normal([IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 64, 512],
stddev=0.1,
seed=SEED,
dtype=tf.float32))
fc1_biases = tf.Variable(tf.constant(0.1, shape=[512],
dtype=tf.float32))
fc2_weights = tf.Variable(tf.truncated_normal([512, NUM_LABELS],
stddev=0.1,
seed=SEED,
dtype=tf.float32))
fc2_biases = tf.Variable(tf.constant(
0.1, shape=[NUM_LABELS], dtype=tf.float32))
# Training computation: logits + cross-entropy loss.
logits = model(train_data_node, True)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=train_labels_node, logits=logits))
# L2 regularization for the fully connected parameters.
regularizers = (tf.nn.l2_loss(fc1_weights)
+ tf.nn.l2_loss(fc1_biases)
+ tf.nn.l2_loss(fc2_weights)
+ tf.nn.l2_loss(fc2_biases))
# Add the regularization term to the loss.
loss += 5e-4 * regularizers
# Optimizer: set up a variable that's incremented once per batch and
# controls the learning rate decay.
batch = tf.Variable(0, dtype=tf.float32)
# Decay once per epoch, using an exponential schedule starting at 0.01.
learning_rate = tf.train.exponential_decay(
0.01, # Base learning rate.
batch * BATCH_SIZE, # Current index into the dataset.
train_size, # Decay step.
0.95, # Decay rate.
staircase=True)
# Use simple momentum for the optimization.
optimizer = tf.train.MomentumOptimizer(learning_rate,
0.9).minimize(loss,
global_step=batch)
# Predictions for the current training minibatch.
train_prediction = tf.nn.softmax(logits)
# Predictions for the test and validation, which we'll compute less
# often.
eval_prediction = tf.nn.softmax(model(eval_data))
# Small utility function to evaluate a dataset by feeding batches of
# data to {eval_data} and pulling the results from {eval_predictions}.
# Saves memory and enables this to run on smaller GPUs.
def eval_in_batches(data, sess):
"""Get predictions for a dataset by running it in small batches."""
size = data.shape[0]
if size < EVAL_BATCH_SIZE:
raise ValueError("batch size for evals larger than dataset: %d"
% size)
predictions = numpy.ndarray(shape=(size, NUM_LABELS),
dtype=numpy.float32)
for begin in xrange(0, size, EVAL_BATCH_SIZE):
end = begin + EVAL_BATCH_SIZE
if end <= size:
predictions[begin:end, :] = sess.run(
eval_prediction,
feed_dict={eval_data: data[begin:end, ...]})
else:
batch_predictions = sess.run(
eval_prediction,
feed_dict={eval_data: data[-EVAL_BATCH_SIZE:, ...]})
predictions[begin:, :] = batch_predictions[begin - size:, :]
return predictions
# Create a local session to run the training.
start_time = time.time()
with tf.Session() as sess:
# Run all the initializers to prepare the trainable parameters.
tf.global_variables_initializer().run()
# Loop through training steps.
for step in xrange(int(num_epochs * train_size) // BATCH_SIZE):
# Compute the offset of the current minibatch in the data.
# Note that we could use better randomization across epochs.
offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)
batch_data = train_data[offset:(offset + BATCH_SIZE), ...]
batch_labels = train_labels[offset:(offset + BATCH_SIZE)]
# This dictionary maps the batch data (as a numpy array) to the
# node in the graph it should be fed to.
feed_dict = {train_data_node: batch_data,
train_labels_node: batch_labels}
# Run the optimizer to update weights.
sess.run(optimizer, feed_dict=feed_dict)
# print some extra information once reach the evaluation frequency
if step % EVAL_FREQUENCY == 0:
# fetch some extra nodes' data
l, lr, predictions = sess.run([loss, learning_rate,
train_prediction],
feed_dict=feed_dict)
elapsed_time = time.time() - start_time
start_time = time.time()
print('Step %d (epoch %.2f), %.1f ms' %
(step, float(step) * BATCH_SIZE / train_size,
1000 * elapsed_time / EVAL_FREQUENCY))
print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr))
print('Minibatch error: %.1f%%'
% error_rate(predictions, batch_labels))
print('Validation error: %.1f%%' % error_rate(
eval_in_batches(validation_data, sess), validation_labels))
sys.stdout.flush()
# Finally print the result!
test_error = error_rate(eval_in_batches(test_data, sess),
test_labels)
print('Test error: %.1f%%' % test_error)
================================================
FILE: ch7/ptb_word_lm.py
================================================
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Example / benchmark for building a PTB LSTM model.
Trains the model described in:
(Zaremba, et. al.) Recurrent Neural Network Regularization
http://arxiv.org/abs/1409.2329
There are 3 supported model configurations:
===========================================
| config | epochs | train | valid | test
===========================================
| small | 13 | 37.99 | 121.39 | 115.91
| medium | 39 | 48.45 | 86.16 | 82.07
| large | 55 | 37.87 | 82.62 | 78.29
The exact results may vary depending on the random initialization.
The hyperparameters used in the model:
- init_scale - the initial scale of the weights
- learning_rate - the initial value of the learning rate
- max_grad_norm - the maximum permissible norm of the gradient
- num_layers - the number of LSTM layers
- num_steps - the number of unrolled steps of LSTM
- hidden_size - the number of LSTM units
- max_epoch - the number of epochs trained with the initial learning rate
- max_max_epoch - the total number of epochs for training
- keep_prob - the probability of keeping weights in the dropout layer
- lr_decay - the decay of the learning rate for each epoch after "max_epoch"
- batch_size - the batch size
The data required for this example is in the data/ dir of the
PTB dataset from Tomas Mikolov's webpage:
$ wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
$ tar xvf simple-examples.tgz
To run:
$ python ptb_word_lm.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import inspect
import time
import numpy as np
import tensorflow as tf
import reader
flags = tf.flags
logging = tf.logging
flags.DEFINE_string("save_path", None,
"Model output directory.")
FLAGS = flags.FLAGS
class PTBInput(object):
"""The input data."""
def __init__(self, config, data, name=None):
self.batch_size = batch_size = config.batch_size
self.num_steps = num_steps = config.num_steps
self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
self.input_data, self.targets = reader.ptb_producer(
data, batch_size, num_steps, name=name)
class PTBModel(object):
"""The PTB model."""
def __init__(self, is_training, config, input_):
self.input = input_
batch_size = input_.batch_size
num_steps = input_.num_steps
size = config.hidden_size
vocab_size = config.vocab_size
# Slightly better results can be obtained with forget gate biases
# initialized to 1 but the hyperparameters of the model would
# need to be different than reported in the paper.
def lstm_cell():
# With the latest TensorFlow source code (as of Mar 27, 2017),
# the BasicLSTMCell will need a reuse parameter which is
# unfortunately not defined in TensorFlow 1.0. To maintain
# backwards compatibility, we add an argument check here:
if 'reuse' in inspect.getargspec(
tf.contrib.rnn.BasicLSTMCell.__init__).args:
return tf.contrib.rnn.BasicLSTMCell(
size, forget_bias=0.0, state_is_tuple=True,
reuse=tf.get_variable_scope().reuse)
else:
return tf.contrib.rnn.BasicLSTMCell(
size, forget_bias=0.0, state_is_tuple=True)
attn_cell = lstm_cell
if is_training and config.keep_prob < 1:
def attn_cell():
return tf.contrib.rnn.DropoutWrapper(
lstm_cell(), output_keep_prob=config.keep_prob)
cell = tf.contrib.rnn.MultiRNNCell(
[attn_cell() for _ in range(config.num_layers)],
state_is_tuple=True)
self.initial_state = cell.zero_state(batch_size, tf.float32)
with tf.device("/cpu:0"):
embedding = tf.get_variable(
"embedding", [vocab_size, size], dtype=tf.float32)
inputs = tf.nn.embedding_lookup(embedding, input_.input_data)
if is_training and config.keep_prob < 1:
inputs = tf.nn.dropout(inputs, config.keep_prob)
outputs = []
state = self.initial_state
with tf.variable_scope("RNN"):
for time_step in range(num_steps):
if time_step > 0: tf.get_variable_scope().reuse_variables()
(cell_output, state) = cell(inputs[:, time_step, :], state)
outputs.append(cell_output)
output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])
softmax_w = tf.get_variable(
"softmax_w", [size, vocab_size], dtype=tf.float32)
softmax_b = tf.get_variable(
"softmax_b", [vocab_size], dtype=tf.float32)
logits = tf.matmul(output, softmax_w) + softmax_b
# Reshape logits to be 3-D tensor for sequence loss
logits = tf.reshape(logits, [batch_size, num_steps, vocab_size])
# use the contrib sequence loss and average over the batches
loss = tf.contrib.seq2seq.sequence_loss(
logits,
input_.targets,
tf.ones([batch_size, num_steps], dtype=tf.float32),
average_across_timesteps=False,
average_across_batch=True
)
# update the cost variables
self.cost = cost = tf.reduce_sum(loss)
self.final_state = state
if not is_training:
return
self.lr = tf.Variable(0.0, trainable=False)
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
config.max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(self.lr)
self.train_op = optimizer.apply_gradients(
zip(grads, tvars),
global_step=tf.contrib.framework.get_or_create_global_step())
self.new_lr = tf.placeholder(
tf.float32, shape=[], name="new_learning_rate")
self.lr_update = tf.assign(self.lr, self.new_lr)
def assign_lr(self, session, lr_value):
session.run(self.lr_update, feed_dict={self.new_lr: lr_value})
class SmallConfig(object):
"""Small config."""
init_scale = 0.1
learning_rate = 1.0
max_grad_norm = 5
num_layers = 2
num_steps = 20
hidden_size = 200
max_epoch = 4
max_max_epoch = 13
keep_prob = 1.0
lr_decay = 0.5
batch_size = 20
vocab_size = 10000
def run_epoch(session, model, eval_op=None, verbose=False):
"""Runs the model on the given data."""
start_time = time.time()
costs = 0.0
iters = 0
state = session.run(model.initial_state)
fetches = {
"cost": model.cost,
"final_state": model.final_state,
}
if eval_op is not None:
fetches["eval_op"] = eval_op
for step in range(model.input.epoch_size):
feed_dict = {}
for i, (c, h) in enumerate(model.initial_state):
feed_dict[c] = state[i].c
feed_dict[h] = state[i].h
vals = session.run(fetches, feed_dict)
cost = vals["cost"]
state = vals["final_state"]
costs += cost
iters += model.input.num_steps
if verbose and step % (model.input.epoch_size // 10) == 10:
print("%.3f perplexity: %.3f speed: %.0f wps" %
(step * 1.0 / model.input.epoch_size,
np.exp(costs / iters),
(iters
* model.input.batch_size/(time.time() - start_time))))
return np.exp(costs / iters)
raw_data = reader.ptb_raw_data("./simple-examples/data")
train_data, valid_data, test_data, _ = raw_data
config = SmallConfig()
eval_config = SmallConfig()
eval_config.batch_size = 1
eval_config.num_steps = 1
with tf.Graph().as_default():
initializer = tf.random_uniform_initializer(-config.init_scale,
config.init_scale)
with tf.name_scope("Train"):
train_input = PTBInput(config=config, data=train_data,
name="TrainInput")
with tf.variable_scope("Model", reuse=None,
initializer=initializer):
m = PTBModel(is_training=True, config=config,
input_=train_input)
tf.summary.scalar("Training Loss", m.cost)
tf.summary.scalar("Learning Rate", m.lr)
with tf.name_scope("Valid"):
valid_input = PTBInput(config=config, data=valid_data,
name="ValidInput")
with tf.variable_scope("Model", reuse=True,
initializer=initializer):
mvalid = PTBModel(is_training=False, config=config,
input_=valid_input)
tf.summary.scalar("Validation Loss", mvalid.cost)
with tf.name_scope("Test"):
test_input = PTBInput(config=eval_config, data=test_data,
name="TestInput")
with tf.variable_scope("Model", reuse=True,
initializer=initializer):
mtest = PTBModel(is_training=False, config=eval_config,
input_=test_input)
sv = tf.train.Supervisor()
with sv.managed_session() as session:
for i in range(config.max_max_epoch):
lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)
m.assign_lr(session, config.learning_rate * lr_decay)
print("Epoch: %d Learning rate: %.3f"
% (i + 1, session.run(m.lr)))
train_perplexity = run_epoch(session, m, eval_op=m.train_op,
verbose=True)
print("Epoch: %d Train Perplexity: %.3f"
% (i + 1, train_perplexity))
valid_perplexity = run_epoch(session, mvalid)
print("Epoch: %d Valid Perplexity: %.3f"
% (i + 1, valid_perplexity))
test_perplexity = run_epoch(session, mtest)
print("Test Perplexity: %.3f" % test_perplexity)
================================================
FILE: ch7/reader.py
================================================
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utilities for parsing PTB text files."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import os
import sys
import tensorflow as tf
def _read_words(filename):
with tf.gfile.GFile(filename, "r") as f:
if sys.version_info[0] >= 3:
return f.read().replace("\n", "").split()
else:
return f.read().decode("utf-8").replace("\n", "").split()
def _build_vocab(filename):
data = _read_words(filename)
counter = collections.Counter(data)
count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))
words, _ = list(zip(*count_pairs))
word_to_id = dict(zip(words, range(len(words))))
return word_to_id
def _file_to_word_ids(filename, word_to_id):
data = _read_words(filename)
return [word_to_id[word] for word in data if word in word_to_id]
def ptb_raw_data(data_path=None):
"""Load PTB raw data from data directory "data_path".
Reads PTB text files, converts strings to integer ids,
and performs mini-batching of the inputs.
The PTB dataset comes from Tomas Mikolov's webpage:
http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
Args:
data_path: string path to the directory where simple-examples.tgz
has been extracted.
Returns:
tuple (train_data, valid_data, test_data, vocabulary)
where each of the data objects can be passed to PTBIterator.
"""
train_path = os.path.join(data_path, "ptb.train.txt")
valid_path = os.path.join(data_path, "ptb.valid.txt")
test_path = os.path.join(data_path, "ptb.test.txt")
word_to_id = _build_vocab(train_path)
train_data = _file_to_word_ids(train_path, word_to_id)
valid_data = _file_to_word_ids(valid_path, word_to_id)
test_data = _file_to_word_ids(test_path, word_to_id)
vocabulary = len(word_to_id)
return train_data, valid_data, test_data, vocabulary
def ptb_producer(raw_data, batch_size, num_steps, name=None):
"""Iterate on the raw PTB data.
This chunks up raw_data into batches of examples and returns
Tensors that are drawn from these batches.
Args:
raw_data: one of the raw data outputs from ptb_raw_data.
batch_size: int, the batch size.
num_steps: int, the number of unrolls.
name: the name of this operation (optional).
Returns:
A pair of Tensors, each shaped [batch_size, num_steps]. The
second element of the tuple is the same data time-shifted to the
right by one.
Raises:
tf.errors.InvalidArgumentError: if batch_size or num_steps are
too high.
"""
with tf.name_scope(name, "PTBProducer",
[raw_data, batch_size, num_steps]):
raw_data = tf.convert_to_tensor(raw_data, name="raw_data",
dtype=tf.int32)
data_len = tf.size(raw_data)
batch_len = data_len // batch_size
data = tf.reshape(raw_data[0 : batch_size * batch_len],
[batch_size, batch_len])
epoch_size = (batch_len - 1) // num_steps
assertion = tf.assert_positive(
epoch_size,
message="epoch_size == 0, decrease batch_size or num_steps")
with tf.control_dependencies([assertion]):
epoch_size = tf.identity(epoch_size, name="epoch_size")
i = tf.train.range_input_producer(epoch_size,
shuffle=False).dequeue()
x = tf.strided_slice(data, [0, i * num_steps],
[batch_size, (i + 1) * num_steps])
x.set_shape([batch_size, num_steps])
y = tf.strided_slice(data, [0, i * num_steps + 1],
[batch_size, (i + 1) * num_steps + 1])
y.set_shape([batch_size, num_steps])
return x, y
================================================
FILE: ch7/setup.sh
================================================
wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz
tar xvf simple-examples.tgz
================================================
FILE: ch8/a3c.py
================================================
"""Asynchronous Advantage Actor-Critic (A3C) algorithm for reinforcement learning."""
import numpy as np
import tensorflow as tf
import copy
import multiprocessing
import os
import re
import threading
from collections import Sequence
import pickle
import threading
import time
import numpy as np
import os
import six
import tensorflow as tf
import tempfile
from tensorgraph import TensorGraph
from tensorgraph import Layer
from tensorgraph import Dense
from tensorgraph import Squeeze
from tensorgraph import Flatten
from tensorgraph import BatchNorm
from tensorgraph import SoftMax
from tensorgraph import Input
class A3CLoss(Layer):
"""This layer computes the loss function for A3C."""
def __init__(self, value_weight, entropy_weight, **kwargs):
super(A3CLoss, self).__init__(**kwargs)
self.value_weight = value_weight
self.entropy_weight = entropy_weight
def create_tensor(self, **kwargs):
reward, action, prob, value, advantage = [
layer.out_tensor for layer in self.in_layers
]
prob = prob + np.finfo(np.float32).eps
log_prob = tf.log(prob)
policy_loss = -tf.reduce_mean(
advantage * tf.reduce_sum(action * log_prob, axis=1))
value_loss = tf.reduce_mean(tf.square(reward - value))
entropy = -tf.reduce_mean(tf.reduce_sum(prob * log_prob, axis=1))
self.out_tensor = policy_loss + self.value_weight * value_loss - self.entropy_weight * entropy
return self.out_tensor
class A3C(object):
"""
Implements the Asynchronous Advantage Actor-Critic (A3C) algorithm for reinforcement learning.
The algorithm is described in Mnih et al, "Asynchronous Methods for Deep
Reinforcement Learning" (https://arxiv.org/abs/1602.01783). This class
requires the policy to output two quantities: a vector giving the probability
of taking each action, and an estimate of the value function for the current
state. It optimizes both outputs at once using a loss that is the sum of three
terms:
1. The policy loss, which seeks to maximize the discounted reward for each action.
2. The value loss, which tries to make the value estimate match the actual
discounted reward that was attained at each step.
3. An entropy term to encourage exploration.
This class only supports environments with discrete action spaces, not
continuous ones. The "action" argument passed to the environment is an
integer, giving the index of the action to perform.
This class supports Generalized Advantage Estimation as described in Schulman
et al., "High-Dimensional Continuous Control Using Generalized Advantage
Estimation" (https://arxiv.org/abs/1506.02438). This is a method of trading
off bias and variance in the advantage estimate, which can sometimes improve
the rate of convergance. Use the advantage_lambda parameter to adjust the
tradeoff.
"""
def __init__(self,
env,
max_rollout_length=20,
discount_factor=0.99,
advantage_lambda=0.98,
value_weight=1.0,
entropy_weight=0.01,
optimizer=None,
model_dir=None):
"""Create an object for optimizing a policy.
Parameters
----------
env: Environment
the Environment to interact with
max_rollout_length: int
the maximum length of rollouts to generate
discount_factor: float
the discount factor to use when computing rewards
advantage_lambda: float
the parameter for trading bias vs. variance in Generalized Advantage Estimation
value_weight: float
a scale factor for the value loss term in the loss function
entropy_weight: float
a scale factor for the entropy term in the loss function
optimizer: Optimizer
the optimizer to use. If None, a default optimizer is used.
model_dir: str
the directory in which the model will be saved. If None, a temporary
directory will be created.
"""
self._env = env
self.max_rollout_length = max_rollout_length
self.discount_factor = discount_factor
self.advantage_lambda = advantage_lambda
self.value_weight = value_weight
self.entropy_weight = entropy_weight
self._optimizer = None
(self._graph, self._features, self._rewards, self._actions,
self._action_prob, self._value, self._advantages) = self.build_graph(
None, "global", model_dir)
with self._graph._get_tf("Graph").as_default():
self._session = tf.Session()
def build_graph(self, tf_graph, scope, model_dir):
"""Construct a TensorGraph containing the policy and loss calculations."""
state_shape = self._env.state_shape
features = []
for s in state_shape:
features.append(Input(shape=[None] + list(s), dtype=tf.float32))
d1 = Flatten(in_layers=features)
d2 = Dense(
in_layers=[d1],
activation_fn=tf.nn.relu,
normalizer_fn=tf.nn.l2_normalize,
normalizer_params={"dim": 1},
out_channels=64)
d3 = Dense(
in_layers=[d2],
activation_fn=tf.nn.relu,
normalizer_fn=tf.nn.l2_normalize,
normalizer_params={"dim": 1},
out_channels=32)
d4 = Dense(
in_layers=[d3],
activation_fn=tf.nn.relu,
normalizer_fn=tf.nn.l2_normalize,
normalizer_params={"dim": 1},
out_channels=16)
d4 = BatchNorm(in_layers=[d4])
d5 = Dense(in_layers=[d4], activation_fn=None, out_channels=9)
value = Dense(in_layers=[d4], activation_fn=None, out_channels=1)
value = Squeeze(squeeze_dims=1, in_layers=[value])
action_prob = SoftMax(in_layers=[d5])
rewards = Input(shape=(None,))
advantages = Input(shape=(None,))
actions = Input(shape=(None, self._env.n_actions))
loss = A3CLoss(
self.value_weight,
self.entropy_weight,
in_layers=[rewards, actions, action_prob, value, advantages])
graph = TensorGraph(
batch_size=self.max_rollout_length,
graph=tf_graph,
model_dir=model_dir)
for f in features:
graph._add_layer(f)
graph.add_output(action_prob)
graph.add_output(value)
graph.set_loss(loss)
graph.set_optimizer(self._optimizer)
with graph._get_tf("Graph").as_default():
with tf.variable_scope(scope):
graph.build()
return graph, features, rewards, actions, action_prob, value, advantages
def fit(self,
total_steps,
max_checkpoints_to_keep=5,
checkpoint_interval=600,
restore=False):
"""Train the policy.
Parameters
----------
total_steps: int
the total number of time steps to perform on the environment, across all
rollouts on all threads
max_checkpoints_to_keep: int
the maximum number of checkpoint files to keep. When this number is
reached, older files are deleted.
checkpoint_interval: float
the time interval at which to save checkpoints, measured in seconds
restore: bool
if True, restore the model from the most recent checkpoint and continue
training from there. If False, retrain the model from scratch.
"""
with self._graph._get_tf("Graph").as_default():
step_count = [0]
workers = []
threads = []
for i in range(multiprocessing.cpu_count()):
workers.append(Worker(self, i))
self._session.run(tf.global_variables_initializer())
if restore:
self.restore()
for worker in workers:
thread = threading.Thread(
name=worker.scope,
target=lambda: worker.run(step_count, total_steps))
threads.append(thread)
thread.start()
variables = tf.get_collection(
tf.GraphKeys.GLOBAL_VARIABLES, scope="global")
saver = tf.train.Saver(variables, max_to_keep=max_checkpoints_to_keep)
checkpoint_index = 0
while True:
threads = [t for t in threads if t.isAlive()]
if len(threads) > 0:
threads[0].join(checkpoint_interval)
checkpoint_index += 1
saver.save(
self._session, self._graph.save_file, global_step=checkpoint_index)
if len(threads) == 0:
break
def predict(self, state):
"""Compute the policy's output predictions for a state.
Parameters
----------
state: array
the state of the environment for which to generate predictions
Returns
-------
the array of action probabilities, and the estimated value function
"""
with self._graph._get_tf("Graph").as_default():
feed_dict = self.create_feed_dict(state)
tensors = [self._action_prob.out_tensor, self._value.out_tensor]
results = self._session.run(tensors, feed_dict=feed_dict)
return results[:2]
def select_action(self,
state,
deterministic=False):
"""Select an action to perform based on the environment's state.
Parameters
----------
state: array
the state of the environment for which to select an action
deterministic: bool
if True, always return the best action (that is, the one with highest
probability). If False, randomly select an action based on the computed
probabilities.
Returns
-------
the index of the selected action
"""
with self._graph._get_tf("Graph").as_default():
feed_dict = self.create_feed_dict(state)
tensors = [self._action_prob.out_tensor]
results = self._session.run(tensors, feed_dict=feed_dict)
probabilities = results[0]
if deterministic:
return probabilities.argmax()
else:
return np.random.choice(
np.arange(self._env.n_actions), p=probabilities[0])
def restore(self):
"""Reload the model parameters from the most recent checkpoint file."""
last_checkpoint = tf.train.latest_checkpoint(self._graph.model_dir)
if last_checkpoint is None:
raise ValueError("No checkpoint found")
with self._graph._get_tf("Graph").as_default():
variables = tf.get_collection(
tf.GraphKeys.GLOBAL_VARIABLES, scope="global")
saver = tf.train.Saver(variables)
saver.restore(self._session, last_checkpoint)
def create_feed_dict(self, state):
"""Create a feed dict for use by predict() or select_action()."""
feed_dict = dict((f.out_tensor, np.expand_dims(s, axis=0))
for f, s in zip(self._features, state))
return feed_dict
class Worker(object):
"""A Worker object is created for each training thread."""
def __init__(self, a3c, index):
self.a3c = a3c
self.index = index
self.scope = "worker%d" % index
self.env = copy.deepcopy(a3c._env)
self.env.reset()
(self.graph, self.features, self.rewards, self.actions, self.action_prob,
self.value, self.advantages) = a3c.build_graph(
a3c._graph._get_tf("Graph"), self.scope, None)
with a3c._graph._get_tf("Graph").as_default():
local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
self.scope)
global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
"global")
gradients = tf.gradients(self.graph.loss.out_tensor, local_vars)
grads_and_vars = list(zip(gradients, global_vars))
self.train_op = a3c._graph._get_tf("Optimizer").apply_gradients(
grads_and_vars)
self.update_local_variables = tf.group(
* [tf.assign(v1, v2) for v1, v2 in zip(local_vars, global_vars)])
self.global_step = self.graph.get_global_step()
def run(self, step_count, total_steps):
with self.graph._get_tf("Graph").as_default():
while step_count[0] < total_steps:
self.a3c._session.run(self.update_local_variables)
states, actions, rewards, values = self.create_rollout()
self.process_rollout(states, actions, rewards, values, step_count[0])
step_count[0] += len(actions)
def create_rollout(self):
"""Generate a rollout."""
n_actions = self.env.n_actions
session = self.a3c._session
states = []
actions = []
rewards = []
values = []
# Generate the rollout.
for i in range(self.a3c.max_rollout_length):
if self.env.terminated:
break
state = self.env.state
states.append(state)
feed_dict = self.create_feed_dict(state)
results = session.run(
[self.action_prob.out_tensor, self.value.out_tensor],
feed_dict=feed_dict)
probabilities, value = results[:2]
action = np.random.choice(np.arange(n_actions), p=probabilities[0])
actions.append(action)
values.append(float(value))
rewards.append(self.env.step(action))
# Compute an estimate of the reward for the rest of the episode.
if not self.env.terminated:
feed_dict = self.create_feed_dict(self.env.state)
final_value = self.a3c.discount_factor * float(
session.run(self.value.out_tensor, feed_dict))
else:
final_value = 0.0
values.append(final_value)
if self.env.terminated:
self.env.reset()
return states, actions, np.array(rewards), np.array(values)
def process_rollout(self, states, actions, rewards, values, step_count):
"""Train the network based on a rollout."""
# Compute the discounted rewards and advantages.
if len(states) == 0:
# Rollout creation sometimes fails in multithreaded environment.
# Don't process if malformed
print("Rollout creation failed. Skipping")
return
discounted_rewards = rewards.copy()
discounted_rewards[-1] += values[-1]
advantages = rewards - values[:-1] + self.a3c.discount_factor * np.array(
values[1:])
for j in range(len(rewards) - 1, 0, -1):
discounted_rewards[j-1] += self.a3c.discount_factor * discounted_rewards[j]
advantages[j-1] += (
self.a3c.discount_factor * self.a3c.advantage_lambda * advantages[j])
# Convert the actions to one-hot.
n_actions = self.env.n_actions
actions_matrix = []
for action in actions:
a = np.zeros(n_actions)
a[action] = 1.0
actions_matrix.append(a)
# Rearrange the states into the proper set of arrays.
state_arrays = [[] for i in range(len(self.features))]
for state in states:
for j in range(len(state)):
state_arrays[j].append(state[j])
# Build the feed dict and apply gradients.
feed_dict = {}
for f, s in zip(self.features, state_arrays):
feed_dict[f.out_tensor] = s
feed_dict[self.rewards.out_tensor] = discounted_rewards
feed_dict[self.actions.out_tensor] = actions_matrix
feed_dict[self.advantages.out_tensor] = advantages
feed_dict[self.global_step] = step_count
self.a3c._session.run(self.train_op, feed_dict=feed_dict)
def create_feed_dict(self, state):
"""Create a feed dict for use during a rollout."""
feed_dict = dict((f.out_tensor, np.expand_dims(s, axis=0))
for f, s in zip(self.features, state))
return feed_dict
================================================
FILE: ch8/environment.py
================================================
import copy
import random
import shutil
import numpy as np
import tensorflow as tf
import deepchem as dc
import collections
class Environment(object):
"""An environment in which an actor performs actions to accomplish a task.
An environment has a current state, which is represented as either a single NumPy
array, or optionally a list of NumPy arrays. When an action is taken, that causes
the state to be updated. Exactly what is meant by an "action" is defined by each
subclass. As far as this interface is concerned, it is simply an arbitrary object.
The environment also computes a reward for each action, and reports when the task
has been terminated (meaning that no more actions may be taken).
"""
def __init__(self, state_shape, n_actions, state_dtype=None):
"""Subclasses should call the superclass constructor in addition to doing their own initialization."""
self.state_shape = state_shape
self.n_actions = n_actions
if state_dtype is None:
# Assume all arrays are float32.
if isinstance(state_shape[0], collections.Sequence):
self.state_dtype = [np.float32] * len(state_shape)
else:
self.state_dtype = np.float32
else:
self.state_dtype = state_dtype
class TicTacToeEnvironment(Environment):
"""
Play tictactoe against a randomly acting opponent
"""
X = np.array([1.0, 0.0])
O = np.array([0.0, 1.0])
EMPTY = np.array([0.0, 0.0])
ILLEGAL_MOVE_PENALTY = -3.0
LOSS_PENALTY = -3.0
NOT_LOSS = 0.1
DRAW_REWARD = 5.0
WIN_REWARD = 10.0
def __init__(self):
super(TicTacToeEnvironment, self).__init__([(3, 3, 2)], 9)
self.state = None
self.terminated = None
self.reset()
def reset(self):
self.terminated = False
self.state = [np.zeros(shape=(3, 3, 2), dtype=np.float32)]
# Randomize who goes first
if random.randint(0, 1) == 1:
move = self.get_O_move()
self.state[0][move[0]][move[1]] = TicTacToeEnvironment.O
def step(self, action):
self.state = copy.deepcopy(self.state)
row = action // 3
col = action % 3
# Illegal move -- the square is not empty
if not np.all(self.state[0][row][col] == TicTacToeEnvironment.EMPTY):
self.terminated = True
return TicTacToeEnvironment.ILLEGAL_MOVE_PENALTY
# Move X
self.state[0][row][col] = TicTacToeEnvironment.X
# Did X Win
if self.check_winner(TicTacToeEnvironment.X):
self.terminated = True
return TicTacToeEnvironment.WIN_REWARD
if self.game_over():
self.terminated = True
return TicTacToeEnvironment.DRAW_REWARD
move = self.get_O_move()
self.state[0][move[0]][move[1]] = TicTacToeEnvironment.O
# Did O Win
if self.check_winner(TicTacToeEnvironment.O):
self.terminated = True
return TicTacToeEnvironment.LOSS_PENALTY
if self.game_over():
self.terminated = True
return TicTacToeEnvironment.DRAW_REWARD
return TicTacToeEnvironment.NOT_LOSS
def get_O_move(self):
empty_squares = []
for row in range(3):
for col in range(3):
if np.all(self.state[0][row][col] == TicTacToeEnvironment.EMPTY):
empty_squares.append((row, col))
return random.choice(empty_squares)
def check_winner(self, player):
for i in range(3):
row = np.sum(self.state[0][i][:], axis=0)
if np.all(row == player * 3):
return True
col = np.sum(self.state[0][:][i], axis=0)
if np.all(col == player * 3):
return True
diag1 = self.state[0][0][0] + self.state[0][1][1] + self.state[0][2][2]
if np.all(diag1 == player * 3):
return True
diag2 = self.state[0][0][2] + self.state[0][1][1] + self.state[0][2][0]
if np.all(diag2 == player * 3):
return True
return False
def game_over(self):
for i in range(3):
for j in range(3):
if np.all(self.state[0][i][j] == TicTacToeEnvironment.EMPTY):
return False
return True
def display(self):
state = self.state[0]
s = ""
for row in range(3):
for col in range(3):
if np.all(state[row][col] == TicTacToeEnvironment.EMPTY):
s += "_"
if np.all(state[row][col] == TicTacToeEnvironment.X):
s += "X"
if np.all(state[row][col] == TicTacToeEnvironment.O):
s += "O"
s += "\n"
return s
================================================
FILE: ch8/tensorgraph.py
================================================
"""TensorGraph OOP Framework."""
import numpy as np
import tensorflow as tf
import copy
import multiprocessing
import os
import re
import threading
from collections import Sequence
import pickle
import threading
import time
import numpy as np
import os
import six
import tensorflow as tf
import tempfile
class TensorGraph(object):
def __init__(self,
batch_size=100,
random_seed=None,
graph=None,
learning_rate=0.001,
model_dir=None,
**kwargs):
"""
Parameters
----------
batch_size: int
default batch size for training and evaluating
graph: tensorflow.Graph
the Graph in which to create Tensorflow objects. If None, a new Graph
is created.
learning_rate: float or LearningRateSchedule
the learning rate to use for optimization
kwargs
"""
# Layer Management
self.layers = dict()
self.features = list()
self.labels = list()
self.outputs = list()
self.task_weights = list()
self.loss = None
self.built = False
self.optimizer = None
self.learning_rate = learning_rate
# Singular place to hold Tensor objects which don't serialize
# See TensorGraph._get_tf() for more details on lazy construction
self.tensor_objects = {
"Graph": graph,
#"train_op": None,
}
self.global_step = 0
self.batch_size = batch_size
self.random_seed = random_seed
if model_dir is not None:
if not os.path.exists(model_dir):
os.makedirs(model_dir)
else:
model_dir = tempfile.mkdtemp()
self.model_dir_is_temp = True
self.model_dir = model_dir
self.save_file = "%s/%s" % (self.model_dir, "model")
self.model_class = None
def _add_layer(self, layer):
if layer.name is None:
layer.name = "%s_%s" % (layer.__class__.__name__, len(self.layers) + 1)
if layer.name in self.layers:
return
if isinstance(layer, Input):
self.features.append(layer)
self.layers[layer.name] = layer
for in_layer in layer.in_layers:
self._add_layer(in_layer)
def topsort(self):
def add_layers_to_list(layer, sorted_layers):
if layer in sorted_layers:
return
for in_layer in layer.in_layers:
add_layers_to_list(in_layer, sorted_layers)
sorted_layers.append(layer)
sorted_layers = []
for l in self.features + self.labels + self.task_weights + self.outputs:
add_layers_to_list(l, sorted_layers)
add_layers_to_list(self.loss, sorted_layers)
return sorted_layers
def build(self):
if self.built:
return
with self._get_tf("Graph").as_default():
self._training_placeholder = tf.placeholder(dtype=tf.float32, shape=())
if self.random_seed is not None:
tf.set_random_seed(self.random_seed)
for layer in self.topsort():
with tf.name_scope(layer.name):
layer.create_tensor(training=self._training_placeholder)
self.session = tf.Session()
self.built = True
def set_loss(self, layer):
self._add_layer(layer)
self.loss = layer
def add_output(self, layer):
self._add_layer(layer)
self.outputs.append(layer)
def set_optimizer(self, optimizer):
"""Set the optimizer to use for fitting."""
self.optimizer = optimizer
def get_layer_variables(self, layer):
"""Get the list of trainable variables in a layer of the graph."""
if not self.built:
self.build()
with self._get_tf("Graph").as_default():
if layer.variable_scope == "":
return []
return tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES, scope=layer.variable_scope)
def get_global_step(self):
return self._get_tf("GlobalStep")
def _get_tf(self, obj):
"""Fetches underlying TensorFlow primitives.
Parameters
----------
obj: str
If "Graph", returns tf.Graph instance. If "Optimizer", returns the
optimizer. If "train_op", returns the train operation. If "GlobalStep" returns
the global step.
Returns
-------
TensorFlow Object
"""
if obj in self.tensor_objects and self.tensor_objects[obj] is not None:
return self.tensor_objects[obj]
if obj == "Graph":
self.tensor_objects["Graph"] = tf.Graph()
elif obj == "Optimizer":
self.tensor_objects["Optimizer"] = tf.train.AdamOptimizer(
learning_rate=self.learning_rate,
beta1=0.9,
beta2=0.999,
epsilon=1e-7)
elif obj == "GlobalStep":
with self._get_tf("Graph").as_default():
self.tensor_objects["GlobalStep"] = tf.Variable(0, trainable=False)
return self._get_tf(obj)
def restore(self):
"""Reload the values of all variables from the most recent checkpoint file."""
if not self.built:
self.build()
last_checkpoint = tf.train.latest_checkpoint(self.model_dir)
if last_checkpoint is None:
raise ValueError("No checkpoint found")
with self._get_tf("Graph").as_default():
saver = tf.train.Saver()
saver.restore(self.session, last_checkpoint)
def __del__(self):
pass
class Layer(object):
def __init__(self, in_layers=None, **kwargs):
if "name" in kwargs:
self.name = kwargs["name"]
else:
self.name = None
if in_layers is None:
in_layers = list()
if not isinstance(in_layers, Sequence):
in_layers = [in_layers]
self.in_layers = in_layers
self.variable_scope = ""
self.tb_input = None
def create_tensor(self, in_layers=None, **kwargs):
raise NotImplementedError("Subclasses must implement for themselves")
def _get_input_tensors(self, in_layers):
"""Get the input tensors to his layer.
Parameters
----------
in_layers: list of Layers or tensors
the inputs passed to create_tensor(). If None, this layer's inputs will
be used instead.
"""
if in_layers is None:
in_layers = self.in_layers
if not isinstance(in_layers, Sequence):
in_layers = [in_layers]
tensors = []
for input in in_layers:
tensors.append(tf.convert_to_tensor(input))
return tensors
def _convert_layer_to_tensor(value, dtype=None, name=None, as_ref=False):
return tf.convert_to_tensor(value.out_tensor, dtype=dtype, name=name)
tf.register_tensor_conversion_function(Layer, _convert_layer_to_tensor)
class Dense(Layer):
def __init__(
self,
out_channels,
activation_fn=None,
biases_initializer=tf.zeros_initializer,
weights_initializer=tf.contrib.layers.variance_scaling_initializer,
**kwargs):
"""Create a dense layer.
The weight and bias initializers are specified by callable objects that construct
and return a Tensorflow initializer when invoked with no arguments. This will typically
be either the initializer class itself (if the constructor does not require arguments),
or a TFWrapper (if it does).
Parameters
----------
out_channels: int
the number of output values
activation_fn: object
the Tensorflow activation function to apply to the output
biases_initializer: callable object
the initializer for bias values. This may be None, in which case the layer
will not include biases.
weights_initializer: callable object
the initializer for weight values
"""
super(Dense, self).__init__(**kwargs)
self.out_channels = out_channels
self.out_tensor = None
self.activation_fn = activation_fn
self.biases_initializer = biases_initializer
self.weights_initializer = weights_initializer
def create_tensor(self, in_layers=None, **kwargs):
inputs = self._get_input_tensors(in_layers)
if len(inputs) != 1:
raise ValueError("Dense layer can only have one input")
parent = inputs[0]
if self.biases_initializer is None:
biases_initializer = None
else:
biases_initializer = self.biases_initializer()
out_tensor = tf.contrib.layers.fully_connected(parent,
num_outputs=self.out_channels,
activation_fn=self.activation_fn,
biases_initializer=biases_initializer,
weights_initializer=self.weights_initializer(),
reuse=False,
trainable=True)
self.out_tensor = out_tensor
return out_tensor
class Squeeze(Layer):
def __init__(self, in_layers=None, squeeze_dims=None, **kwargs):
self.squeeze_dims = squeeze_dims
super(Squeeze, self).__init__(in_layers, **kwargs)
def create_tensor(self, in_layers=None, **kwargs):
inputs = self._get_input_tensors(in_layers)
parent_tensor = inputs[0]
out_tensor = tf.squeeze(parent_tensor, squeeze_dims=self.squeeze_dims)
self.out_tensor = out_tensor
return out_tensor
class BatchNorm(Layer):
def __init__(self, in_layers=None, **kwargs):
super(BatchNorm, self).__init__(in_layers, **kwargs)
def create_tensor(self, in_layers=None, **kwargs):
inputs = self._get_input_tensors(in_layers)
parent_tensor = inputs[0]
out_tensor = tf.layers.batch_normalization(parent_tensor)
self.out_tensor = out_tensor
return out_tensor
class Flatten(Layer):
"""Flatten every dimension except the first"""
def __init__(self, in_layers=None, **kwargs):
super(Flatten, self).__init__(in_layers, **kwargs)
def create_tensor(self, in_layers=None, **kwargs):
inputs = self._get_input_tensors(in_layers)
if len(inputs) != 1:
raise ValueError("Only One Parent to Flatten")
parent = inputs[0]
parent_shape = parent.get_shape()
vector_size = 1
for i in range(1, len(parent_shape)):
vector_size *= parent_shape[i].value
parent_tensor = parent
out_tensor = tf.reshape(parent_tensor, shape=(-1, vector_size))
self.out_tensor = out_tensor
return out_tensor
class SoftMax(Layer):
def __init__(self, in_layers=None, **kwargs):
super(SoftMax, self).__init__(in_layers, **kwargs)
def create_tensor(self, in_layers=None, **kwargs):
inputs = self._get_input_tensors(in_layers)
if len(inputs) != 1:
raise ValueError("Must only Softmax single parent")
parent = inputs[0]
out_tensor = tf.contrib.layers.softmax(parent)
self.out_tensor = out_tensor
return out_tensor
class Input(Layer):
def __init__(self, shape, dtype=tf.float32, **kwargs):
self._shape = tuple(shape)
self.dtype = dtype
super(Input, self).__init__(**kwargs)
def create_tensor(self, in_layers=None, **kwargs):
if in_layers is None:
in_layers = self.in_layers
out_tensor = tf.placeholder(dtype=self.dtype, shape=self._shape)
self.out_tensor = out_tensor
return out_tensor
================================================
FILE: ch8/tictactoe.py
================================================
"""Adapted from DeepChem Examples by Peter Eastman and Karl Leswing."""
import copy
import random
import shutil
import numpy as np
import tensorflow as tf
import deepchem as dc
from environment import TicTacToeEnvironment
from a3c import A3C
def eval_tic_tac_toe(value_weight,
num_epoch_rounds=1,
games=10**4,
rollouts=10**5,
advantage_lambda=0.98):
"""
Returns the average reward over 10k games after 100k rollouts
Parameters
----------
value_weight: float
Returns
-------
avg_rewards
"""
env = TicTacToeEnvironment()
model_dir = "/tmp/tictactoe"
try:
shutil.rmtree(model_dir)
except:
pass
avg_rewards = []
for j in range(num_epoch_rounds):
print("Epoch round: %d" % j)
a3c_engine = A3C(
env,
entropy_weight=0.01,
value_weight=value_weight,
model_dir=model_dir,
advantage_lambda=advantage_lambda)
try:
a3c_engine.restore()
except:
print("unable to restore")
pass
a3c_engine.fit(rollouts)
rewards = []
for i in range(games):
env.reset()
reward = -float('inf')
while not env.terminated:
action = a3c_engine.select_action(env.state)
reward = env.step(action)
rewards.append(reward)
print("Mean reward at round %d is %f" % (j+1, np.mean(rewards)))
avg_rewards.append({(j + 1) * rollouts: np.mean(rewards)})
return avg_rewards
def main():
value_weight = 6.0
score = eval_tic_tac_toe(value_weight=0.2, num_epoch_rounds=20,
advantage_lambda=0.,
games=10**4, rollouts=5*10**4)
print(score)
if __name__ == "__main__":
main()
================================================
FILE: ch9/cifar10.py
================================================
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Builds the CIFAR-10 network.
Summary of available functions:
# Compute input images and labels for training. If you would like to run
# evaluations, use inputs() instead.
inputs, labels = distorted_inputs()
# Compute inference on the model inputs to make a prediction.
predictions = inference(inputs)
# Compute the total loss of the prediction with respect to the labels.
loss = loss(predictions, labels)
# Create a graph to run one step of training with respect to the loss.
train_op = train(loss, global_step)
"""
# pylint: disable=missing-docstring
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import re
import sys
import tarfile
from six.moves import urllib, xrange # pylint: disable=redefined-builtin
import tensorflow as tf
import cifar10_input
FLAGS = tf.app.flags.FLAGS
# Basic model parameters.
tf.app.flags.DEFINE_integer('batch_size', 128,
"""Number of images to process in a batch.""")
tf.app.flags.DEFINE_string('data_dir', '/tmp/cifar10_data',
"""Path to the CIFAR-10 data directory.""")
# Process images of this size. Note that this differs from the original CIFAR
# image size of 32 x 32. If one alters this number, then the entire model
# architecture will change and any model would need to be retrained.
IMAGE_SIZE = 24
NUM_CLASSES = 10
# Global constants describing the CIFAR-10 data set.
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000
# Constants describing the training process.
MOVING_AVERAGE_DECAY = 0.9999 # The decay to use for the moving average.
NUM_EPOCHS_PER_DECAY = 350.0 # Epochs after which learning rate decays.
LEARNING_RATE_DECAY_FACTOR = 0.1 # Learning rate decay factor.
INITIAL_LEARNING_RATE = 0.1 # Initial learning rate.
# If a model is trained with multiple GPUs, prefix all Op names with tower_name
# to differentiate the operations. Note that this prefix is removed from the
# names of the summaries when visualizing a model.
TOWER_NAME = 'tower'
DATA_URL = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
def distorted_inputs():
"""Construct distorted input for CIFAR training using the Reader ops.
Returns:
images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
Raises:
ValueError: If no data_dir
"""
if not FLAGS.data_dir:
raise ValueError('Please supply a data_dir')
data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin')
images, labels = _distorted_inputs(data_dir=data_dir,
batch_size=FLAGS.batch_size)
return images, labels
def read_cifar10(filename_queue):
"""Reads and parses examples from CIFAR10 data files.
Recommendation: if you want N-way read parallelism, call this function
N times. This will give you N independent Readers reading different
files & positions within those files, which will give better mixing of
examples.
Args:
filename_queue: A queue of strings with the filenames to read from.
Returns:
An object representing a single example, with the following fields:
height: number of rows in the result (32)
width: number of columns in the result (32)
depth: number of color channels in the result (3)
key: a scalar string Tensor describing the filename & record number
for this example.
label: an int32 Tensor with the label in the range 0..9.
uint8image: a [height, width, depth] uint8 Tensor with the image data
"""
class CIFAR10Record(object):
pass
result = CIFAR10Record()
# Dimensions of the images in the CIFAR-10 dataset.
# See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
# input format.
label_bytes = 1 # 2 for CIFAR-100
result.height = 32
result.width = 32
result.depth = 3
image_bytes = result.height * result.width * result.depth
# Every record consists of a label followed by the image, with a
# fixed number of bytes for each.
record_bytes = label_bytes + image_bytes
# Read a record, getting filenames from the filename_queue. No
# header or footer in the CIFAR-10 format, so we leave header_bytes
# and footer_bytes at their default of 0.
reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
result.key, value = reader.read(filename_queue)
# Convert from a string to a vector of uint8 that is record_bytes long.
record_bytes = tf.decode_raw(value, tf.uint8)
# The first bytes represent the label, which we convert from uint8->int32.
result.label = tf.cast(
tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32)
# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
depth_major = tf.reshape(
tf.strided_slice(record_bytes, [label_bytes],
[label_bytes + image_bytes]),
[result.depth, result.height, result.width])
# Convert from [depth, height, width] to [height, width, depth].
result.uint8image = tf.transpose(depth_major, [1, 2, 0])
return result
def _distorted_inputs(data_dir, batch_size):
"""Construct distorted input for CIFAR training using the Reader ops.
Args:
data_dir: Path to the CIFAR-10 data directory.
batch_size: Number of images per batch.
Returns:
images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
"""
filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)
for i in xrange(1, 6)]
for f in filenames:
if not tf.gfile.Exists(f):
raise ValueError('Failed to find file: ' + f)
# Create a queue that produces the filenames to read.
filename_queue = tf.train.string_input_producer(filenames)
# Read examples from files in the filename queue.
read_input = read_cifar10(filename_queue)
reshaped_image = tf.cast(read_input.uint8image, tf.float32)
height = IMAGE_SIZE
width = IMAGE_SIZE
# Image processing for training the network. Note the many random
# distortions applied to the image.
# Randomly crop a [height, width] section of the image.
distorted_image = tf.random_crop(reshaped_image, [height, width, 3])
# Randomly flip the image horizontally.
distorted_image = tf.image.random_flip_left_right(distorted_image)
# Because these operations are not commutative, consider randomizing
# the order their operation.
# NOTE: since per_image_standardization zeros the mean and makes
# the stddev unit, this likely has no effect see tensorflow#1458.
distorted_image = tf.image.random_brightness(distorted_image,
max_delta=63)
distorted_image = tf.image.random_contrast(distorted_image,
lower=0.2, upper=1.8)
# Subtract off the mean and divide by the variance of the pixels.
float_image = tf.image.per_image_standardization(distorted_image)
# Set the shapes of tensors.
float_image.set_shape([height, width, 3])
read_input.label.set_shape([1])
# Ensure that the random shuffling has good mixing properties.
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
min_fraction_of_examples_in_queue)
print ('Filling queue with %d CIFAR images before starting to train. '
'This will take a few minutes.' % min_queue_examples)
# Generate a batch of images and labels by building up a queue of examples.
return _generate_image_and_label_batch(float_image, read_input.label,
min_queue_examples, batch_size,
shuffle=True)
def inputs(eval_data):
"""Construct input for CIFAR evaluation using the Reader ops.
Args:
eval_data: bool, indicating if one should use the train or eval data set.
Returns:
images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
Raises:
ValueError: If no data_dir
"""
if not FLAGS.data_dir:
raise ValueError('Please supply a data_dir')
data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin')
images, labels = _inputs(eval_data=eval_data,
data_dir=data_dir,
batch_size=FLAGS.batch_size)
return images, labels
def _inputs(eval_data, data_dir, batch_size):
"""Construct input for CIFAR evaluation using the Reader ops.
Args:
eval_data: bool, indicating if one should use the train or eval data set.
data_dir: Path to the CIFAR-10 data directory.
batch_size: Number of images per batch.
Returns:
images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
"""
if not eval_data:
filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)
for i in xrange(1, 6)]
num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN
else:
filenames = [os.path.join(data_dir, 'test_batch.bin')]
num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_EVAL
for f in filenames:
if not tf.gfile.Exists(f):
raise ValueError('Failed to find file: ' + f)
# Create a queue that produces the filenames to read.
filename_queue = tf.train.string_input_producer(filenames)
# Read examples from files in the filename queue.
read_input = read_cifar10(filename_queue)
reshaped_image = tf.cast(read_input.uint8image, tf.float32)
height = IMAGE_SIZE
width = IMAGE_SIZE
# Image processing for evaluation.
# Crop the central [height, width] of the image.
resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image,
height, width)
# Subtract off the mean and divide by the variance of the pixels.
float_image = tf.image.per_image_standardization(resized_image)
# Set the shapes of tensors.
float_image.set_shape([height, width, 3])
read_input.label.set_shape([1])
# Ensure that the random shuffling has good mixing properties.
min_fraction_of_examples_in_queue = 0.4
min_queue_examples = int(num_examples_per_epoch *
min_fraction_of_examples_in_queue)
# Generate a batch of images and labels by building up a queue of examples.
return _generate_image_and_label_batch(float_image, read_input.label,
min_queue_examples, batch_size,
shuffle=False)
def maybe_download_and_extract():
"""Download and extract the tarball from Alex's website."""
dest_directory = FLAGS.data_dir
if not os.path.exists(dest_directory):
os.makedirs(dest_directory)
filename = DATA_URL.split('/')[-1]
filepath = os.path.join(dest_directory, filename)
if not os.path.exists(filepath):
def _progress(count, block_size, total_size):
sys.stdout.write('\r>> Downloading %s %.1f%%' % (filename,
float(count * block_size) / float(total_size) * 100.0))
sys.stdout.flush()
filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
print()
statinfo = os.stat(filepath)
print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
extracted_dir_path = os.path.join(dest_directory, 'cifar-10-batches-bin')
if not os.path.exists(extracted_dir_path):
tarfile.open(filepath, 'r:gz').extractall(dest_directory)
def _generate_image_and_label_batch(image, label, min_queue_examples,
batch_size, shuffle):
"""Construct a queued batch of images and labels.
Args:
image: 3-D Tensor of [height, width, 3] of type.float32.
label: 1-D Tensor of type.int32
min_queue_examples: int32, minimum number of samples to retain
in the queue that provides of batches of examples.
batch_size: Number of images per batch.
shuffle: boolean indicating whether to use a shuffling queue.
Returns:
images: Images. 4D tensor of [batch_size, height, width, 3] size.
labels: Labels. 1D tensor of [batch_size] size.
"""
# Create a queue that shuffles the examples, and then
# read 'batch_size' images + labels from the example queue.
num_preprocess_threads = 16
if shuffle:
images, label_batch = tf.train.shuffle_batch(
[image, label],
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity=min_queue_examples + 3 * batch_size,
min_after_dequeue=min_queue_examples)
else:
images, label_batch = tf.train.batch(
[image, label],
batch_size=batch_size,
num_threads=num_preprocess_threads,
capacity=min_queue_examples + 3 * batch_size)
# Display the training images in the visualizer.
tf.summary.image('images', images)
return images, tf.reshape(label_batch, [batch_size])
================================================
FILE: ch9/cifar10_multi_gpu_train.py
================================================
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A binary to train CIFAR-10 using multiple GPUs with synchronous updates.
Accuracy:
cifar10_multi_gpu_train.py achieves ~86% accuracy after 100K steps
(256 epochs of data) as judged by cifar10_eval.py.
Speed: With batch_size 128.
System | Step Time (sec/batch) | Accuracy
--------------------------------------------------------------------
1 Tesla K20m | 0.35-0.60 | ~86% at 60K steps (5 hours)
1 Tesla K40m | 0.25-0.35 | ~86% at 100K steps (4 hours)
2 Tesla K20m | 0.13-0.20 | ~84% at 30K steps (2.5 hours)
3 Tesla K20m | 0.13-0.18 | ~84% at 30K steps
4 Tesla K20m | ~0.10 | ~84% at 30K steps
Usage:
Please see the tutorial and website for how to download the CIFAR-10
data set, compile the program and train the model.
http://tensorflow.org/tutorials/deep_cnn/
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from datetime import datetime
import os.path
import shutil
import re
import time
import numpy as np
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
import cifar10
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('train_dir', '/tmp/cifar10_train',
"""Directory where to write event logs """
"""and checkpoint.""")
tf.app.flags.DEFINE_integer('max_steps', 1000000,
"""Number of batches to run.""")
tf.app.flags.DEFINE_integer('num_gpus', 1,
"""How many GPUs to use.""")
tf.app.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
def _activation_summary(x):
"""Helper to create summaries for activations.
Creates a summary that provides a histogram of activations.
Creates a summary that measures the sparsity of activations.
Args:
x: Tensor
Returns:
nothing
"""
# Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
# session. This helps the clarity of presentation on tensorboard.
tensor_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', x.op.name)
tf.summary.histogram(tensor_name + '/activations', x)
tf.summary.scalar(tensor_name + '/sparsity',
tf.nn.zero_fraction(x))
def _variable_on_cpu(name, shape, initializer):
"""Helper to create a Variable stored on CPU memory.
Args:
name: name of the variable
shape: list of ints
initializer: initializer for Variable
Returns:
Variable Tensor
"""
with tf.device('/cpu:0'):
var = tf.get_variable(name, shape, initializer=initializer, dtype=tf.float32)
return var
def _variable_with_weight_decay(name, shape, stddev, wd):
"""Helper to create an initialized Variable with weight decay.
Note that the Variable is initialized with a truncated normal distribution.
A weight decay is added only if one is specified.
Args:
name: name of the variable
shape: list of ints
stddev: standard deviation of a truncated Gaussian
wd: add L2Loss weight decay multiplied by this float. If None, weight
decay is not added for this Variable.
Returns:
Variable Tensor
"""
var = _variable_on_cpu(
name,
shape,
tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32))
if wd is not None:
weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
tf.add_to_collection('losses', weight_decay)
return var
def inference(images):
"""Build the CIFAR-10 model.
Args:
images: Images returned from distorted_inputs() or inputs().
Returns:
Logits.
"""
# We instantiate all variables using tf.get_variable() instead of
# tf.Variable() in order to share variables across multiple GPU training runs.
# If we only ran this model on a single GPU, we could simplify this function
# by replacing all instances of tf.get_variable() with tf.Variable().
#
# conv1
with tf.variable_scope('conv1') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[5, 5, 3, 64],
stddev=5e-2,
wd=0.0)
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
pre_activation = tf.nn.bias_add(conv, biases)
conv1 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv1)
# pool1
pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],
padding='SAME', name='pool1')
# norm1
norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm1')
# conv2
with tf.variable_scope('conv2') as scope:
kernel = _variable_with_weight_decay('weights',
shape=[5, 5, 64, 64],
stddev=5e-2,
wd=0.0)
conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
pre_activation = tf.nn.bias_add(conv, biases)
conv2 = tf.nn.relu(pre_activation, name=scope.name)
_activation_summary(conv2)
# norm2
norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,
name='norm2')
# pool2
pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],
strides=[1, 2, 2, 1], padding='SAME', name='pool2')
# local3
with tf.variable_scope('local3') as scope:
# Move everything into depth so we can perform a single matrix multiply.
reshape = tf.reshape(pool2, [FLAGS.batch_size, -1])
dim = reshape.get_shape()[1].value
weights = _variable_with_weight_decay('weights', shape=[dim, 384],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))
local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)
_activation_summary(local3)
# local4
with tf.variable_scope('local4') as scope:
weights = _variable_with_weight_decay('weights', shape=[384, 192],
stddev=0.04, wd=0.004)
biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))
local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)
_activation_summary(local4)
# linear layer(WX + b),
# We don't apply softmax here because
# tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits
# and performs the softmax internally for efficiency.
with tf.variable_scope('softmax_linear') as scope:
weights = _variable_with_weight_decay('weights', [192, cifar10.NUM_CLASSES],
stddev=1/192.0, wd=0.0)
biases = _variable_on_cpu('biases', [cifar10.NUM_CLASSES],
tf.constant_initializer(0.0))
softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)
_activation_summary(softmax_linear)
return softmax_linear
def loss(logits, labels):
"""Add L2Loss to all the trainable variables.
Add summary for "Loss" and "Loss/avg".
Args:
logits: Logits from inference().
labels: Labels from distorted_inputs or inputs(). 1-D tensor
of shape [batch_size]
Returns:
Loss tensor of type float.
"""
# Calculate the average cross entropy loss across the batch.
labels = tf.cast(labels, tf.int64)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=labels, logits=logits, name='cross_entropy_per_example')
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
# The total loss is defined as the cross entropy loss plus all of the weight
# decay terms (L2 loss).
return tf.add_n(tf.get_collection('losses'), name='total_loss')
def tower_loss(scope, images, labels):
"""Calculate the total loss on a single tower running the CIFAR model.
Args:
scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0'
images: Images. 4D tensor of shape [batch_size, height, width, 3].
labels: Labels. 1D tensor of shape [batch_size].
Returns:
Tensor of shape [] containing the total loss for a batch of data
"""
# Build inference Graph.
logits = inference(images)
# Build the portion of the Graph calculating the losses. Note that we will
# assemble the total_loss using a custom function below.
_ = loss(logits, labels)
# Assemble all of the losses for the current tower only.
losses = tf.get_collection('losses', scope)
# Calculate the total loss for the current tower.
total_loss = tf.add_n(losses, name='total_loss')
# Attach a scalar summary to all individual losses and the total
# loss; do the same for the averaged version of the losses.
for l in losses + [total_loss]:
# Remove 'tower_[0-9]/' from the name in case this is a multi-GPU
# training session. This helps the clarity of presentation on
# tensorboard.
loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name)
tf.summary.scalar(loss_name, l)
return total_loss
def average_gradients(tower_grads):
"""Calculate the average gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The
outer list is over individual gradients. The inner list is over
the gradient calculation for each tower.
Returns:
List of pairs of (gradient, variable) where the gradient has been averaged
across all towers.
"""
average_grads = []
for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = []
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
def train():
"""Train CIFAR-10 for a number of steps."""
with tf.Graph().as_default(), tf.device('/cpu:0'):
# Create a variable to count the number of train() calls. This equals the
# number of batches processed * FLAGS.num_gpus.
global_step = tf.get_variable(
'global_step', [],
initializer=tf.constant_initializer(0), trainable=False)
# Calculate the learning rate schedule.
num_batches_per_epoch = (cifar10.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
FLAGS.batch_size)
decay_steps = int(num_batches_per_epoch * cifar10.NUM_EPOCHS_PER_DECAY)
# Decay the learning rate exponentially based on the number of steps.
lr = tf.train.exponential_decay(cifar10.INITIAL_LEARNING_RATE,
global_step,
decay_steps,
cifar10.LEARNING_RATE_DECAY_FACTOR,
staircase=True)
# Create an optimizer that performs gradient descent.
opt = tf.train.GradientDescentOptimizer(lr)
# Get images and labels for CIFAR-10.
images, labels = cifar10.distorted_inputs()
batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue(
[images, labels], capacity=2 * FLAGS.num_gpus)
# Calculate the gradients for each model tower.
tower_grads = []
with tf.variable_scope(tf.get_variable_scope()):
for i in xrange(FLAGS.num_gpus):
with tf.device('/gpu:%d' % i):
with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope:
# Dequeues one batch for the GPU
image_batch, label_batch = batch_queue.dequeue()
# Calculate the loss for one tower of the CIFAR model. This function
# constructs the entire CIFAR model but shares the variables across
# all towers.
loss = tower_loss(scope, image_batch, label_batch)
# Reuse variables for the next tower.
tf.get_variable_scope().reuse_variables()
# Retain the summaries from the final tower.
summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
# Calculate the gradients for the batch of data on this CIFAR tower.
grads = opt.compute_gradients(loss)
# Keep track of the gradients across all towers.
tower_grads.append(grads)
# We must calculate the mean of each gradient. Note that this is the
# synchronization point across all towers.
grads = average_gradients(tower_grads)
# Add a summary to track the learning rate.
summaries.append(tf.summary.scalar('learning_rate', lr))
# Add histograms for gradients.
for grad, var in grads:
if grad is not None:
summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad))
# Apply the gradients to adjust the shared variables.
apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
# Add histograms for trainable variables.
for var in tf.trainable_variables():
summaries.append(tf.summary.histogram(var.op.name, var))
# Track the moving averages of all trainable variables.
variable_averages = tf.train.ExponentialMovingAverage(
cifar10.MOVING_AVERAGE_DECAY, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
# Group all updates to into a single train op.
train_op = tf.group(apply_gradient_op, variables_averages_op)
# Create a saver.
saver = tf.train.Saver(tf.global_variables())
# Build the summary operation from the last tower summaries.
summary_op = tf.summary.merge(summaries)
# Build an initialization operation to run below.
init = tf.global_variables_initializer()
# Start running operations on the Graph. allow_soft_placement must be set to
# True to build towers on GPU, as some of the ops do not have GPU
# implementations.
sess = tf.Session(config=tf.ConfigProto(
allow_soft_placement=True,
log_device_placement=FLAGS.log_device_placement))
sess.run(init)
# Start the queue runners.
tf.train.start_queue_runners(sess=sess)
summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
for step in xrange(FLAGS.max_steps):
start_time = time.time()
_, loss_value = sess.run([train_op, loss])
duration = time.time() - start_time
assert not np.isnan(loss_value), 'Model diverged with loss = NaN'
if step % 10 == 0:
num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
examples_per_sec = num_examples_per_step / duration
sec_per_batch = duration / FLAGS.num_gpus
format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
'sec/batch)')
print (format_str % (datetime.now(), step, loss_value,
examples_per_sec, sec_per_batch))
if step % 100 == 0:
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, step)
# Save the model checkpoint periodically.
if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
saver.save(sess, checkpoint_path, global_step=step)
def main(argv=None): # pylint: disable=unused-argument
cifar10.maybe_download_and_extract()
if os.path.exists(FLAGS.train_dir):
shutil.rmtree(FLAGS.train_dir)
os.makedirs(FLAGS.train_dir)
train()
if __name__ == '__main__':
main()