[
  {
    "path": "README.md",
    "content": "# TensorFlow for Deep Learning Companion Code\nReferenced throughout the book.\n\n\n<center>\n\n![Book Cover](https://pbs.twimg.com/media/DXQHXOtVoAEO4T_.jpg:large)\n\n</center>\n\n\n## TensorFlow Versions\n\nThe TensorFlow library has been evolving rapidly in the last couple years, and some of the code in this repo and the associated book no longer work with the latest versions of TensorFlow. We recommend using TensorFlow 1.6 for working through all exercises in this book. We are looking into creating a full `requirements.txt` file for all needed dependencies and hope to have that available for you soon.\n\nWe also welcome any PRs that modify code to work with more recent TensorFlow versions. We are looking into these upgrades on our end as well.\n"
  },
  {
    "path": "ch3/linear_regression_tf.py",
    "content": "import numpy as np\nnp.random.seed(456)\nimport  tensorflow as tf\ntf.set_random_seed(456)\nfrom matplotlib import rc\nrc('text', usetex=True)\nimport matplotlib.pyplot as plt\nfrom scipy.stats import pearsonr\nfrom sklearn.metrics import mean_squared_error\n\n\ndef pearson_r2_score(y, y_pred):\n  \"\"\"Computes Pearson R^2 (square of Pearson correlation).\"\"\"\n  return pearsonr(y, y_pred)[0]**2\n\ndef rms_score(y_true, y_pred):\n  \"\"\"Computes RMS error.\"\"\"\n  return np.sqrt(mean_squared_error(y_true, y_pred))\n\n# Generate synthetic data\nN = 100\nw_true = 5\nb_true = 2\nnoise_scale = .1\nx_np = np.random.rand(N, 1)\nnoise = np.random.normal(scale=noise_scale, size=(N, 1))\n# Convert shape of y_np to (N,)\ny_np = np.reshape(w_true * x_np  + b_true + noise, (-1))\n\n# Save image of the data distribution\nplt.scatter(x_np, y_np)\nplt.xlabel(\"x\")\nplt.ylabel(\"y\")\nplt.xlim(0, 1)\nplt.title(\"Toy Linear Regression Data, \"\n          r\"$y = 5x + 2 + N(0, 1)$\")\nplt.savefig(\"lr_data.png\")\n\n# Generate tensorflow graph\nwith tf.name_scope(\"placeholders\"):\n  x = tf.placeholder(tf.float32, (N, 1))\n  y = tf.placeholder(tf.float32, (N,))\nwith tf.name_scope(\"weights\"):\n  W = tf.Variable(tf.random_normal((1, 1)))\n  b = tf.Variable(tf.random_normal((1,)))\nwith tf.name_scope(\"prediction\"):\n  y_pred = tf.matmul(x, W) + b\nwith tf.name_scope(\"loss\"):\n  l = tf.reduce_sum((y - tf.squeeze(y_pred))**2)\nwith tf.name_scope(\"optim\"):\n  train_op = tf.train.AdamOptimizer(.001).minimize(l)\n\nwith tf.name_scope(\"summaries\"):\n  tf.summary.scalar(\"loss\", l)\n  merged = tf.summary.merge_all()\n\ntrain_writer = tf.summary.FileWriter('/tmp/lr-train', tf.get_default_graph())\n\nn_steps = 8000\nwith tf.Session() as sess:\n  sess.run(tf.global_variables_initializer())\n  # Train model\n  for i in range(n_steps):\n    feed_dict = {x: x_np, y: y_np}\n    _, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)\n    print(\"step %d, loss: %f\" % (i, loss))\n    train_writer.add_summary(summary, i)\n\n  # Get weights\n  w_final, b_final = sess.run([W, b])\n\n  # Make Predictions\n  y_pred_np = sess.run(y_pred, feed_dict={x: x_np})\n\ny_pred_np = np.reshape(y_pred_np, -1)\nr2 = pearson_r2_score(y_np, y_pred_np)\nprint(\"Pearson R^2: %f\" % r2)\nrms = rms_score(y_np, y_pred_np)\nprint(\"RMS: %f\" % rms)\n\n# Clear figure\nplt.clf()\nplt.xlabel(\"Y-true\")\nplt.ylabel(\"Y-pred\")\nplt.title(\"Predicted versus True values \"\n          r\"(Pearson $R^2$: $0.994$)\")\nplt.scatter(y_np, y_pred_np)\nplt.savefig(\"lr_pred.png\")\n\n# Now draw with learned regression line\nplt.clf()\nplt.xlabel(\"x\")\nplt.ylabel(\"y\")\nplt.title(\"True Model versus Learned Model \"\n          r\"(RMS: $1.027620$)\")\nplt.xlim(0, 1)\nplt.scatter(x_np, y_np)\nx_left = 0\ny_left = w_final[0]*x_left + b_final\nx_right = 1\ny_right = w_final[0]*x_right + b_final\nplt.plot([x_left, x_right], [y_left, y_right], color='k')\nplt.savefig(\"lr_learned.png\")\n"
  },
  {
    "path": "ch3/linear_regression_tf_simple.py",
    "content": "import  tensorflow as tf\n\nd = 10\nN = 100\n\nx = tf.placeholder(tf.float32, (N, d))\ny = tf.placeholder(tf.float32, (N,))\nW = tf.Variable(tf.random_normal((d, 1)))\nb = tf.Variable(tf.random_normal((1,)))\nl = tf.reduce_sum((y - (tf.matmul(x, W) + b))**2)\n\nwith tf.Session() as sess:\n\ttf.global_variables_initializer().run(session=sess)\n\n"
  },
  {
    "path": "ch3/logistic_regression_tf.py",
    "content": "import numpy as np\nnp.random.seed(456)\nimport tensorflow as tf\ntf.set_random_seed(456)\nimport matplotlib.pyplot as plt\nfrom sklearn.metrics import accuracy_score\nfrom scipy.special import logit\n\n# Generate synthetic data\nN = 100\n# Zeros form a Gaussian centered at (-1, -1)\nx_zeros = np.random.multivariate_normal(\n    mean=np.array((-1, -1)), cov=.1*np.eye(2), size=(N//2,))\ny_zeros = np.zeros((N//2,))\n# Ones form a Gaussian centered at (1, 1)\nx_ones = np.random.multivariate_normal(\n    mean=np.array((1, 1)), cov=.1*np.eye(2), size=(N//2,))\ny_ones = np.ones((N//2,))\n\nx_np = np.vstack([x_zeros, x_ones])\ny_np = np.concatenate([y_zeros, y_ones])\n\n# Save image of the data distribution\nplt.xlabel(r\"$x_1$\")\nplt.ylabel(r\"$x_2$\")\nplt.title(\"Toy Logistic Regression Data\")\n\n# Plot Zeros\nplt.scatter(x_zeros[:, 0], x_zeros[:, 1], color=\"blue\")\nplt.scatter(x_ones[:, 0], x_ones[:, 1], color=\"red\")\nplt.savefig(\"logistic_data.png\")\n\n# Generate tensorflow graph\nwith tf.name_scope(\"placeholders\"):\n  x = tf.placeholder(tf.float32, (N, 2))\n  y = tf.placeholder(tf.float32, (N,))\nwith tf.name_scope(\"weights\"):\n  W = tf.Variable(tf.random_normal((2, 1)))\n  b = tf.Variable(tf.random_normal((1,)))\nwith tf.name_scope(\"prediction\"):\n  y_logit = tf.squeeze(tf.matmul(x, W) + b)\n  # the sigmoid gives the class probability of 1\n  y_one_prob = tf.sigmoid(y_logit)\n  # Rounding P(y=1) will give the correct prediction.\n  y_pred = tf.round(y_one_prob)\n\nwith tf.name_scope(\"loss\"):\n  # Compute the cross-entropy term for each datapoint\n  entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y)\n  # Sum all contributions\n  l = tf.reduce_sum(entropy)\nwith tf.name_scope(\"optim\"):\n  train_op = tf.train.AdamOptimizer(.01).minimize(l)\n\nwith tf.name_scope(\"summaries\"):\n  tf.summary.scalar(\"loss\", l)\n  merged = tf.summary.merge_all()\n\ntrain_writer = tf.summary.FileWriter('/tmp/logistic-train', tf.get_default_graph())\n\nn_steps = 1000\nwith tf.Session() as sess:\n  sess.run(tf.global_variables_initializer())\n  # Train model\n  for i in range(n_steps):\n    feed_dict = {x: x_np, y: y_np}\n    _, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)\n    print(\"loss: %f\" % loss)\n    train_writer.add_summary(summary, i)\n\n  # Get weights\n  w_final, b_final = sess.run([W, b])\n\n  # Make Predictions\n  y_pred_np = sess.run(y_pred, feed_dict={x: x_np})\n\nscore = accuracy_score(y_np, y_pred_np)\nprint(\"Classification Accuracy: %f\" % score)\n\nplt.clf()\n# Save image of the data distribution\nplt.xlabel(r\"$x_1$\")\nplt.ylabel(r\"$x_2$\")\nplt.title(\"Learned Model (Classification Accuracy: 1.00)\")\nplt.xlim(-2, 2)\nplt.ylim(-2, 2)\n\n# Plot Zeros\nplt.scatter(x_zeros[:, 0], x_zeros[:, 1], color=\"blue\")\nplt.scatter(x_ones[:, 0], x_ones[:, 1], color=\"red\")\n\nx_left = -2\ny_left = (1./w_final[1]) * (-b_final + logit(.5) - w_final[0]*x_left)\n\nx_right = 2\ny_right = (1./w_final[1]) * (-b_final + logit(.5) - w_final[0]*x_right)\nplt.plot([x_left, x_right], [y_left, y_right], color='k')\n\nplt.savefig(\"logistic_pred.png\")\n"
  },
  {
    "path": "ch4/fcnet_classification_tf.py",
    "content": "import numpy as np\nnp.random.seed(456)\nimport  tensorflow as tf\ntf.set_random_seed(456)\nimport matplotlib.pyplot as plt\nfrom sklearn.metrics import accuracy_score\n\n# Generate synthetic data\nN = 100\nw_true = 5\nb_true = 2\nnoise_scale = .1\n# Zeros form a Gaussian centered at (-1, -1)\nx_zeros = np.random.multivariate_normal(\n    mean=np.array((-1, -1)), cov=.1*np.eye(2), size=(N/2,))\ny_zeros = np.zeros((N/2,))\n# Ones form a Gaussian centered at (1, 1)\nx_ones = np.random.multivariate_normal(\n    mean=np.array((1, 1)), cov=.1*np.eye(2), size=(N/2,))\ny_ones = np.ones((N/2,))\n\nx_np = np.vstack([x_zeros, x_ones])\ny_np = np.concatenate([y_zeros, y_ones])\n\n\n# Save image of the data distribution\nplt.xlabel(\"Dimension 1\")\nplt.ylabel(\"Dimension 2\")\nplt.title(\"FCNet Classification Data\")\n\n# Plot Zeros\nplt.scatter(x_zeros[:, 0], x_zeros[:, 1], color=\"blue\")\nplt.scatter(x_ones[:, 0], x_ones[:, 1], color=\"red\")\nplt.savefig(\"fcnet_classification_data.png\")\n\n# Generate tensorflow graph\nd = 2\nn_hidden = 15\nwith tf.name_scope(\"placeholders\"):\n  x = tf.placeholder(tf.float32, (N, d))\n  y = tf.placeholder(tf.float32, (N,))\nwith tf.name_scope(\"layer-1\"):\n  W = tf.Variable(tf.random_normal((d, n_hidden)))\n  b = tf.Variable(tf.random_normal((n_hidden,)))\n  x_1 = tf.nn.relu(tf.matmul(x, W) + b)\nwith tf.name_scope(\"output\"):\n  W = tf.Variable(tf.random_normal((n_hidden, 1)))\n  b = tf.Variable(tf.random_normal((1,)))\n  y_logit = tf.squeeze(tf.matmul(x_1, W) + b)\n  # the sigmoid gives the class probability of 1\n  y_one_prob = tf.sigmoid(y_logit)\n  # Rounding P(y=1) will give the correct prediction.\n  y_pred = tf.round(y_one_prob)\nwith tf.name_scope(\"loss\"):\n  # Compute the cross-entropy term for each datapoint\n  entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y)\n  # Sum all contributions\n  l = tf.reduce_sum(entropy)\n\nwith tf.name_scope(\"optim\"):\n  train_op = tf.train.AdamOptimizer(.001).minimize(l)\n\nwith tf.name_scope(\"summaries\"):\n  tf.summary.scalar(\"loss\", l)\n  merged = tf.summary.merge_all()\n\ntrain_writer = tf.summary.FileWriter('/tmp/fcnet-classification-train',\n                                     tf.get_default_graph())\n\nn_steps = 200\nwith tf.Session() as sess:\n  sess.run(tf.global_variables_initializer())\n  # Train model\n  for i in range(n_steps):\n    feed_dict = {x: x_np, y: y_np}\n    _, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)\n    print(\"step %d, loss: %f\" % (i, loss))\n    train_writer.add_summary(summary, i)\n\n  # Make Predictions\n  y_pred_np = sess.run(y_pred, feed_dict={x: x_np})\n\nscore = accuracy_score(y_np, y_pred_np)\nprint(\"Classification Accuracy: %f\" % score)\n\n"
  },
  {
    "path": "ch4/fcnet_regression_tf.py",
    "content": "import numpy as np\nnp.random.seed(456)\nimport  tensorflow as tf\ntf.set_random_seed(456)\nimport matplotlib.pyplot as plt\nfrom scipy.stats import pearsonr\n\ndef pearson_r2_score(y, y_pred):\n  \"\"\"Computes Pearson R^2 (square of Pearson correlation).\"\"\"\n  return pearsonr(y, y_pred)[0]**2\n\n# Generate synthetic data\nd = 1\nN = 50\nw_true = 5\nb_true = 2\nnoise_scale = .1\nx_np = np.random.rand(N, d)\nnoise = np.random.normal(scale=noise_scale, size=(N, d))\ny_np = np.reshape(w_true * x_np  + b_true + noise, (-1))\n\n# Save image of the data distribution\nplt.scatter(x_np, y_np)\nplt.xlabel(\"X\")\nplt.ylabel(\"y\")\nplt.title(\"Raw Linear Regression Data\")\nplt.savefig(\"fcnet_regression_data.png\")\n\n# Generate tensorflow graph\nn_hidden = 15\nwith tf.name_scope(\"placeholders\"):\n  x = tf.placeholder(tf.float32, (N, d))\n  y = tf.placeholder(tf.float32, (N,))\nwith tf.name_scope(\"layer-1\"):\n  W = tf.Variable(tf.random_normal((d, n_hidden)))\n  b = tf.Variable(tf.random_normal((n_hidden,)))\n  x_1 = tf.nn.relu(tf.matmul(x, W) + b)\nwith tf.name_scope(\"output\"):\n  W = tf.Variable(tf.random_normal((n_hidden, 1)))\n  b = tf.Variable(tf.random_normal((1,)))\n  y_pred = tf.transpose(tf.matmul(x_1, W) + b)\nwith tf.name_scope(\"loss\"):\n  lvec = (y - y_pred)**2\n  l = tf.reduce_sum(lvec)\nwith tf.name_scope(\"optim\"):\n  train_op = tf.train.AdamOptimizer(.001).minimize(l)\n\nwith tf.name_scope(\"summaries\"):\n  tf.summary.scalar(\"loss\", l)\n  merged = tf.summary.merge_all()\n\ntrain_writer = tf.summary.FileWriter('/tmp/fcnet-regression-train', tf.get_default_graph())\n\nn_steps = 1000\nwith tf.Session() as sess:\n  sess.run(tf.global_variables_initializer())\n  # Train model\n  for i in range(n_steps):\n    feed_dict = {x: x_np, y: y_np}\n    _, summary, loss, lossvec = sess.run([train_op, merged, l, lvec], feed_dict=feed_dict)\n    print(\"step %d, loss: %f, loss-vec-size: %s\" % (i, loss, lossvec.shape))\n    train_writer.add_summary(summary, i)\n\n  # Make Predictions\n  y_pred_np = sess.run(y_pred, feed_dict={x: x_np})\n\ny_pred_np = np.reshape(y_pred_np, -1)\nr2 = pearson_r2_score(y_np, y_pred_np)\nprint(\"Pearson R^2: %f\" % r2)\n\n# Clear figure\nplt.clf()\nplt.xlabel(\"Y-true\")\nplt.ylabel(\"Y-pred\")\nplt.title(\"Predicted versus true values\")\nplt.scatter(y_np, y_pred_np)\nplt.savefig(\"fcnet_regression_pred.png\")\n\n# Now draw with learned regression line\nplt.clf()\nplt.xlabel(\"X\")\nplt.ylabel(\"Y\")\nplt.title(\"Predicted versus true values\")\nplt.xlim(0, 1)\nplt.scatter(x_np, y_np)\nplt.scatter(x_np, y_pred_np)\nplt.savefig(\"fcnet_regression_learned.png\")\n"
  },
  {
    "path": "ch4/tox21_fcnet.py",
    "content": "import numpy as np\nnp.random.seed(456)\nimport  tensorflow as tf\ntf.set_random_seed(456)\nimport matplotlib.pyplot as plt\nimport deepchem as dc\nfrom sklearn.metrics import accuracy_score\n\n_, (train, valid, test), _ = dc.molnet.load_tox21()\ntrain_X, train_y, train_w = train.X, train.y, train.w\nvalid_X, valid_y, valid_w = valid.X, valid.y, valid.w\ntest_X, test_y, test_w = test.X, test.y, test.w\n\n# Remove extra tasks\ntrain_y = train_y[:, 0]\nvalid_y = valid_y[:, 0]\ntest_y = test_y[:, 0]\ntrain_w = train_w[:, 0]\nvalid_w = valid_w[:, 0]\ntest_w = test_w[:, 0]\n\n\n# Generate tensorflow graph\nd = 1024\nn_hidden = 50\nlearning_rate = .001\nn_epochs = 10\nbatch_size = 100\n\nwith tf.name_scope(\"placeholders\"):\n  x = tf.placeholder(tf.float32, (None, d))\n  y = tf.placeholder(tf.float32, (None,))\nwith tf.name_scope(\"hidden-layer\"):\n  W = tf.Variable(tf.random_normal((d, n_hidden)))\n  b = tf.Variable(tf.random_normal((n_hidden,)))\n  x_hidden = tf.nn.relu(tf.matmul(x, W) + b)\nwith tf.name_scope(\"output\"):\n  W = tf.Variable(tf.random_normal((n_hidden, 1)))\n  b = tf.Variable(tf.random_normal((1,)))\n  y_logit = tf.matmul(x_hidden, W) + b\n  # the sigmoid gives the class probability of 1\n  y_one_prob = tf.sigmoid(y_logit)\n  # Rounding P(y=1) will give the correct prediction.\n  y_pred = tf.round(y_one_prob)\nwith tf.name_scope(\"loss\"):\n  # Compute the cross-entropy term for each datapoint\n  y_expand = tf.expand_dims(y, 1)\n  entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)\n  # Sum all contributions\n  l = tf.reduce_sum(entropy)\n\nwith tf.name_scope(\"optim\"):\n  train_op = tf.train.AdamOptimizer(learning_rate).minimize(l)\n\nwith tf.name_scope(\"summaries\"):\n  tf.summary.scalar(\"loss\", l)\n  merged = tf.summary.merge_all()\n\ntrain_writer = tf.summary.FileWriter('/tmp/fcnet-tox21',\n                                     tf.get_default_graph())\nN = train_X.shape[0]\nwith tf.Session() as sess:\n  sess.run(tf.global_variables_initializer())\n  step = 0\n  for epoch in range(n_epochs):\n    pos = 0\n    while pos < N:\n      batch_X = train_X[pos:pos+batch_size]\n      batch_y = train_y[pos:pos+batch_size]\n      feed_dict = {x: batch_X, y: batch_y}\n      _, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)\n      print(\"epoch %d, step %d, loss: %f\" % (epoch, step, loss))\n      train_writer.add_summary(summary, step)\n    \n      step += 1\n      pos += batch_size\n\n  # Make Predictions\n  valid_y_pred = sess.run(y_pred, feed_dict={x: valid_X})\n\nscore = accuracy_score(valid_y, valid_y_pred)\nprint(\"Unweighted Classification Accuracy: %f\" % score)\n\nweighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)\nprint(\"Weighted Classification Accuracy: %f\" % weighted_score)\n"
  },
  {
    "path": "ch4/tox21_fcnet_dropout.py",
    "content": "import numpy as np\nnp.random.seed(456)\nimport  tensorflow as tf\ntf.set_random_seed(456)\nimport matplotlib.pyplot as plt\nimport deepchem as dc\nfrom sklearn.metrics import accuracy_score\n\n_, (train, valid, test), _ = dc.molnet.load_tox21()\ntrain_X, train_y, train_w = train.X, train.y, train.w\nvalid_X, valid_y, valid_w = valid.X, valid.y, valid.w\ntest_X, test_y, test_w = test.X, test.y, test.w\n\n# Remove extra tasks\ntrain_y = train_y[:, 0]\nvalid_y = valid_y[:, 0]\ntest_y = test_y[:, 0]\ntrain_w = train_w[:, 0]\nvalid_w = valid_w[:, 0]\ntest_w = test_w[:, 0]\n\n\n# Generate tensorflow graph\nd = 1024\nn_hidden = 50\nlearning_rate = .001\nn_epochs = 10\nbatch_size = 100\ndropout_prob = 1.0\n\nwith tf.name_scope(\"placeholders\"):\n  x = tf.placeholder(tf.float32, (None, d))\n  y = tf.placeholder(tf.float32, (None,))\n  keep_prob = tf.placeholder(tf.float32)\nwith tf.name_scope(\"hidden-layer\"):\n  W = tf.Variable(tf.random_normal((d, n_hidden)))\n  b = tf.Variable(tf.random_normal((n_hidden,)))\n  x_hidden = tf.nn.relu(tf.matmul(x, W) + b)\n  # Apply dropout\n  x_hidden = tf.nn.dropout(x_hidden, keep_prob)\nwith tf.name_scope(\"output\"):\n  W = tf.Variable(tf.random_normal((n_hidden, 1)))\n  b = tf.Variable(tf.random_normal((1,)))\n  y_logit = tf.matmul(x_hidden, W) + b\n  # the sigmoid gives the class probability of 1\n  y_one_prob = tf.sigmoid(y_logit)\n  # Rounding P(y=1) will give the correct prediction.\n  y_pred = tf.round(y_one_prob)\nwith tf.name_scope(\"loss\"):\n  # Compute the cross-entropy term for each datapoint\n  y_expand = tf.expand_dims(y, 1)\n  entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)\n  # Sum all contributions\n  l = tf.reduce_sum(entropy)\n\nwith tf.name_scope(\"optim\"):\n  train_op = tf.train.AdamOptimizer(learning_rate).minimize(l)\n\nwith tf.name_scope(\"summaries\"):\n  tf.summary.scalar(\"loss\", l)\n  merged = tf.summary.merge_all()\n\ntrain_writer = tf.summary.FileWriter('/tmp/fcnet-tox21-dropout',\n                                     tf.get_default_graph())\nN = train_X.shape[0]\nwith tf.Session() as sess:\n  sess.run(tf.global_variables_initializer())\n  step = 0\n  for epoch in range(n_epochs):\n    pos = 0\n    while pos < N:\n      batch_X = train_X[pos:pos+batch_size]\n      batch_y = train_y[pos:pos+batch_size]\n      feed_dict = {x: batch_X, y: batch_y, keep_prob: dropout_prob}\n      _, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)\n      print(\"epoch %d, step %d, loss: %f\" % (epoch, step, loss))\n      train_writer.add_summary(summary, step)\n    \n      step += 1\n      pos += batch_size\n\n  # Make Predictions (set keep_prob to 1.0 for predictions)\n  train_y_pred = sess.run(y_pred, feed_dict={x: train_X, keep_prob: 1.0})\n  valid_y_pred = sess.run(y_pred, feed_dict={x: valid_X, keep_prob: 1.0})\n  test_y_pred = sess.run(y_pred, feed_dict={x: test_X, keep_prob: 1.0})\n\ntrain_weighted_score = accuracy_score(train_y, train_y_pred, sample_weight=train_w)\nprint(\"Train Weighted Classification Accuracy: %f\" % train_weighted_score)\nvalid_weighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)\nprint(\"Valid Weighted Classification Accuracy: %f\" % valid_weighted_score)\ntest_weighted_score = accuracy_score(test_y, test_y_pred, sample_weight=test_w)\nprint(\"Test Weighted Classification Accuracy: %f\" % test_weighted_score)\n"
  },
  {
    "path": "ch5/fcnet_func.py",
    "content": "import numpy as np\nnp.random.seed(456)\nimport  tensorflow as tf\ntf.set_random_seed(456)\nimport matplotlib.pyplot as plt\nimport deepchem as dc\nfrom sklearn.metrics import accuracy_score\n\ndef eval_tox21_hyperparams(n_hidden=50, n_layers=1, learning_rate=.001,\n                           dropout_prob=0.5, n_epochs=45, batch_size=100,\n                           weight_positives=True):\n\n  print(\"---------------------------------------------\")\n  print(\"Model hyperparameters\")\n  print(\"n_hidden = %d\" % n_hidden)\n  print(\"n_layers = %d\" % n_layers)\n  print(\"learning_rate = %f\" % learning_rate)\n  print(\"n_epochs = %d\" % n_epochs)\n  print(\"batch_size = %d\" % batch_size)\n  print(\"weight_positives = %s\" % str(weight_positives))\n  print(\"dropout_prob = %f\" % dropout_prob)\n  print(\"---------------------------------------------\")\n\n  d = 1024\n  graph = tf.Graph()\n  with graph.as_default():\n    _, (train, valid, test), _ = dc.molnet.load_tox21()\n    train_X, train_y, train_w = train.X, train.y, train.w\n    valid_X, valid_y, valid_w = valid.X, valid.y, valid.w\n    test_X, test_y, test_w = test.X, test.y, test.w\n\n    # Remove extra tasks\n    train_y = train_y[:, 0]\n    valid_y = valid_y[:, 0]\n    test_y = test_y[:, 0]\n    train_w = train_w[:, 0]\n    valid_w = valid_w[:, 0]\n    test_w = test_w[:, 0]\n\n    # Generate tensorflow graph\n    with tf.name_scope(\"placeholders\"):\n      x = tf.placeholder(tf.float32, (None, d))\n      y = tf.placeholder(tf.float32, (None,))\n      w = tf.placeholder(tf.float32, (None,))\n      keep_prob = tf.placeholder(tf.float32)\n    for layer in range(n_layers):\n      with tf.name_scope(\"layer-%d\" % layer):\n        W = tf.Variable(tf.random_normal((d, n_hidden)))\n        b = tf.Variable(tf.random_normal((n_hidden,)))\n        x_hidden = tf.nn.relu(tf.matmul(x, W) + b)\n        # Apply dropout\n        x_hidden = tf.nn.dropout(x_hidden, keep_prob)\n    with tf.name_scope(\"output\"):\n      W = tf.Variable(tf.random_normal((n_hidden, 1)))\n      b = tf.Variable(tf.random_normal((1,)))\n      y_logit = tf.matmul(x_hidden, W) + b\n      # the sigmoid gives the class probability of 1\n      y_one_prob = tf.sigmoid(y_logit)\n      # Rounding P(y=1) will give the correct prediction.\n      y_pred = tf.round(y_one_prob)\n    with tf.name_scope(\"loss\"):\n      # Compute the cross-entropy term for each datapoint\n      y_expand = tf.expand_dims(y, 1)\n      entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits=y_logit, labels=y_expand)\n      # Multiply by weights\n      if weight_positives:\n        w_expand = tf.expand_dims(w, 1)\n        entropy = w_expand * entropy\n      # Sum all contributions\n      l = tf.reduce_sum(entropy)\n\n    with tf.name_scope(\"optim\"):\n      train_op = tf.train.AdamOptimizer(learning_rate).minimize(l)\n\n    with tf.name_scope(\"summaries\"):\n      tf.summary.scalar(\"loss\", l)\n      merged = tf.summary.merge_all()\n\n    hyperparam_str = \"d-%d-hidden-%d-lr-%f-n_epochs-%d-batch_size-%d-weight_pos-%s\" % (\n        d, n_hidden, learning_rate, n_epochs, batch_size, str(weight_positives))\n    train_writer = tf.summary.FileWriter('/tmp/fcnet-func-' + hyperparam_str,\n                                         tf.get_default_graph())\n    N = train_X.shape[0]\n    with tf.Session() as sess:\n      sess.run(tf.global_variables_initializer())\n      step = 0\n      for epoch in range(n_epochs):\n        pos = 0\n        while pos < N:\n          batch_X = train_X[pos:pos+batch_size]\n          batch_y = train_y[pos:pos+batch_size]\n          batch_w = train_w[pos:pos+batch_size]\n          feed_dict = {x: batch_X, y: batch_y, w: batch_w, keep_prob: dropout_prob}\n          _, summary, loss = sess.run([train_op, merged, l], feed_dict=feed_dict)\n          print(\"epoch %d, step %d, loss: %f\" % (epoch, step, loss))\n          train_writer.add_summary(summary, step)\n        \n          step += 1\n          pos += batch_size\n\n      # Make Predictions (set keep_prob to 1.0 for predictions)\n      valid_y_pred = sess.run(y_pred, feed_dict={x: valid_X, keep_prob: 1.0})\n\n    weighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)\n    print(\"Valid Weighted Classification Accuracy: %f\" % weighted_score)\n  return weighted_score\n\nif __name__ == \"__main__\":\n  score = eval_tox21_hyperparams()\n"
  },
  {
    "path": "ch5/hidden_grid_search.py",
    "content": "import numpy as np\nfrom fcnet_func import eval_tox21_hyperparams\n\nscores = {}\nn_reps = 3\nhidden_sizes = [30, 60]\nepochs = [15, 30, 45]\ndropouts = [.5]\nnum_layers = [1, 2]\n\nfor rep in range(n_reps):\n  for n_epochs in epochs:\n    for hidden_size in hidden_sizes:\n      for dropout in dropouts:\n        for n_layers in num_layers:\n          score = eval_tox21_hyperparams(n_hidden=hidden_size, n_epochs=n_epochs,\n                                         dropout_prob=dropout, n_layers=n_layers)\n          if (hidden_size, n_epochs, dropout, n_layers) not in scores:\n            scores[(hidden_size, n_epochs, dropout, n_layers)] = []\n          scores[(hidden_size, n_epochs, dropout, n_layers)].append(score)\nprint(\"All Scores\")\nprint(scores)\n\navg_scores = {}\nfor params, param_scores in scores.iteritems():\n  avg_scores[params] = np.mean(np.array(param_scores))\nprint(\"Scores Averaged over %d repetitions\" % n_reps)\nprint(avg_scores)\n"
  },
  {
    "path": "ch5/simple_grid_search.py",
    "content": "import numpy as np\nfrom fcnet_func import eval_tox21_hyperparams\n\nscores = {}\nn_reps = 3\nhidden_sizes = [50]\nepochs = [10]\ndropouts = [.5, 1.0]\nnum_layers = [1, 2]\n\nfor rep in range(n_reps):\n  for n_epochs in epochs:\n    for hidden_size in hidden_sizes:\n      for dropout in dropouts:\n        for n_layers in num_layers:\n          score = eval_tox21_hyperparams(n_hidden=hidden_size, n_epochs=n_epochs,\n                                         dropout_prob=dropout, n_layers=n_layers)\n          if (hidden_size, n_epochs, dropout, n_layers) not in scores:\n            scores[(hidden_size, n_epochs, dropout, n_layers)] = []\n          scores[(hidden_size, n_epochs, dropout, n_layers)].append(score)\nprint(\"All Scores\")\nprint(scores)\n\navg_scores = {}\nfor params, param_scores in scores.iteritems():\n  avg_scores[params] = np.mean(np.array(param_scores))\nprint(\"Scores Averaged over %d repetitions\" % n_reps)\nprint(avg_scores)\n"
  },
  {
    "path": "ch5/tox21_rf.py",
    "content": "import numpy as np\nnp.random.seed(456)\nimport matplotlib.pyplot as plt\nimport deepchem as dc\nfrom sklearn.metrics import accuracy_score\nfrom sklearn.ensemble import RandomForestClassifier\n\n_, (train, valid, test), _ = dc.molnet.load_tox21()\ntrain_X, train_y, train_w = train.X, train.y, train.w\nvalid_X, valid_y, valid_w = valid.X, valid.y, valid.w\ntest_X, test_y, test_w = test.X, test.y, test.w\n\n# Remove extra tasks\ntrain_y = train_y[:, 0]\nvalid_y = valid_y[:, 0]\ntest_y = test_y[:, 0]\ntrain_w = train_w[:, 0]\nvalid_w = valid_w[:, 0]\ntest_w = test_w[:, 0]\n\n# Generate tensorflow graph\nsklearn_model = RandomForestClassifier(\n    class_weight=\"balanced\", n_estimators=50)\nprint(\"About to fit model on train set.\")\nsklearn_model.fit(train_X, train_y)\n\ntrain_y_pred = sklearn_model.predict(train_X)\nvalid_y_pred = sklearn_model.predict(valid_X)\ntest_y_pred = sklearn_model.predict(test_X)\n\nweighted_score = accuracy_score(train_y, train_y_pred, sample_weight=train_w)\nprint(\"Weighted train Classification Accuracy: %f\" % weighted_score)\nweighted_score = accuracy_score(valid_y, valid_y_pred, sample_weight=valid_w)\nprint(\"Weighted valid Classification Accuracy: %f\" % weighted_score)\nweighted_score = accuracy_score(test_y, test_y_pred, sample_weight=test_w)\nprint(\"Weighted test Classification Accuracy: %f\" % weighted_score)\n"
  },
  {
    "path": "ch6/convolutional.py",
    "content": "# Copyright 2015 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Simple, end-to-end, LeNet-5-like convolutional MNIST model example.\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport argparse\nimport gzip\nimport os\nimport sys\nimport time\n\nimport numpy\nfrom six.moves import urllib\nfrom six.moves import xrange  # pylint: disable=redefined-builtin\nimport tensorflow as tf\n\nSOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'\nWORK_DIRECTORY = 'data'\nIMAGE_SIZE = 28\nNUM_CHANNELS = 1\nPIXEL_DEPTH = 255\nNUM_LABELS = 10\nVALIDATION_SIZE = 5000  # Size of the validation set.\nSEED = 66478  # Set to None for random seed.\nBATCH_SIZE = 64\nNUM_EPOCHS = 10\nEVAL_BATCH_SIZE = 64\nEVAL_FREQUENCY = 100  # Number of steps between evaluations.\n\n\ndef download(filename):\n  \"\"\"Download the data from Yann's website, unless it's already here.\"\"\"\n  if not os.path.exists(WORK_DIRECTORY):\n    os.makedirs(WORK_DIRECTORY)\n  filepath = os.path.join(WORK_DIRECTORY, filename)\n  if not os.path.exists(filepath):\n    filepath, _ = urllib.request.urlretrieve(SOURCE_URL + filename,\n                                             filepath)\n    size = os.stat(filepath).st_size\n    print('Successfully downloaded', filename, size, 'bytes.')\n  return filepath\n\n\ndef extract_data(filename, num_images):\n  \"\"\"Extract the images into a 4D tensor [image index, y, x, channels].\n\n  Values are rescaled from [0, 255] down to [-0.5, 0.5].\n  \"\"\"\n  print('Extracting', filename)\n  with gzip.open(filename) as bytestream:\n    bytestream.read(16)\n    buf = bytestream.read(\n        IMAGE_SIZE * IMAGE_SIZE * num_images * NUM_CHANNELS)\n    data = numpy.frombuffer(buf, dtype=numpy.uint8).astype(\n        numpy.float32)\n    # The original data consists of pixels ranging from 0-255.\n    # Center the data to have mean zero, and unit range.\n    data = (data - (255/2.0))/255 \n    data = data.reshape(num_images, IMAGE_SIZE, IMAGE_SIZE,\n                        NUM_CHANNELS)\n    return data\n\n\ndef extract_labels(filename, num_images):\n  \"\"\"Extract the labels into a vector of int64 label IDs.\"\"\"\n  print('Extracting', filename)\n  with gzip.open(filename) as bytestream:\n    # Discard header.\n    bytestream.read(8)\n    # Read bytes for labels.\n    buf = bytestream.read(num_images)\n    labels = numpy.frombuffer(buf, dtype=numpy.uint8).astype(\n        numpy.int64)\n  return labels\n\n\ndef error_rate(predictions, labels):\n  \"\"\"Return the error rate based on dense predictions and sparse labels.\"\"\"\n  return 100.0 - (\n      100.0 *\n      numpy.sum(numpy.argmax(predictions, 1) == labels) /\n      predictions.shape[0])\n\n# We will replicate the model structure for the training subgraph, as\n# well as the evaluation subgraphs, while sharing the trainable\n# parameters.\ndef model(data, train=False):\n  \"\"\"The Model definition.\"\"\"\n  # 2D convolution, with 'SAME' padding (i.e. the output feature map\n  # has the same size as the input). Note that {strides} is a 4D array\n  # whose shape matches the data layout: [image index, y, x, depth].\n  conv = tf.nn.conv2d(data,\n                      conv1_weights,\n                      strides=[1, 1, 1, 1],\n                      padding='SAME')\n  # Bias and rectified linear non-linearity.\n  relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases))\n  # Max pooling. The kernel size spec {ksize} also follows the layout\n  # of the data. Here we have a pooling window of 2, and a stride of\n  # 2.\n  pool = tf.nn.max_pool(relu,\n                        ksize=[1, 2, 2, 1],\n                        strides=[1, 2, 2, 1],\n                        padding='SAME')\n  conv = tf.nn.conv2d(pool,\n                      conv2_weights,\n                      strides=[1, 1, 1, 1],\n                      padding='SAME')\n  relu = tf.nn.relu(tf.nn.bias_add(conv, conv2_biases))\n  pool = tf.nn.max_pool(relu,\n                        ksize=[1, 2, 2, 1],\n                        strides=[1, 2, 2, 1],\n                        padding='SAME')\n  # Reshape the feature map cuboid into a 2D matrix to feed it to the\n  # fully connected layers.\n  pool_shape = pool.get_shape().as_list()\n  reshape = tf.reshape(\n      pool,\n      [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]])\n  # Fully connected layer. Note that the '+' operation automatically\n  # broadcasts the biases.\n  hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases)\n  # Add a 50% dropout during training only. Dropout also scales\n  # activations such that no rescaling is needed at evaluation time.\n  if train:\n    hidden = tf.nn.dropout(hidden, 0.5, seed=SEED)\n  return tf.matmul(hidden, fc2_weights) + fc2_biases\n\n\n\n# Get the data.\ntrain_data_filename = download('train-images-idx3-ubyte.gz')\ntrain_labels_filename = download('train-labels-idx1-ubyte.gz')\ntest_data_filename = download('t10k-images-idx3-ubyte.gz')\ntest_labels_filename = download('t10k-labels-idx1-ubyte.gz')\n\n# Extract it into numpy arrays.\ntrain_data = extract_data(train_data_filename, 60000)\ntrain_labels = extract_labels(train_labels_filename, 60000)\ntest_data = extract_data(test_data_filename, 10000)\ntest_labels = extract_labels(test_labels_filename, 10000)\n\n# Generate a validation set.\nvalidation_data = train_data[:VALIDATION_SIZE, ...]\nvalidation_labels = train_labels[:VALIDATION_SIZE]\ntrain_data = train_data[VALIDATION_SIZE:, ...]\ntrain_labels = train_labels[VALIDATION_SIZE:]\n\nnum_epochs = NUM_EPOCHS\ntrain_size = train_labels.shape[0]\n\n# This is where training samples and labels are fed to the graph.\n# These placeholder nodes will be fed a batch of training data at each\n# training step using the {feed_dict} argument to the Run() call below.\ntrain_data_node = tf.placeholder(\n    tf.float32,\n    shape=(BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))\ntrain_labels_node = tf.placeholder(tf.int64, shape=(BATCH_SIZE,))\neval_data = tf.placeholder(\n    tf.float32,\n    shape=(EVAL_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))\n\n# The variables below hold all the trainable weights. They are passed\n# an initial value which will be assigned when we call:\n# {tf.global_variables_initializer().run()}\nconv1_weights = tf.Variable(\n    # 5x5 filter, depth 32.\n    tf.truncated_normal([5, 5, NUM_CHANNELS, 32],  \n                        stddev=0.1,\n                        seed=SEED, dtype=tf.float32))\nconv1_biases = tf.Variable(tf.zeros([32], dtype=tf.float32))\nconv2_weights = tf.Variable(tf.truncated_normal(\n    [5, 5, 32, 64], stddev=0.1,\n    seed=SEED, dtype=tf.float32))\nconv2_biases = tf.Variable(tf.constant(0.1, shape=[64],\n                           dtype=tf.float32))\nfc1_weights = tf.Variable(  # fully connected, depth 512.\n    tf.truncated_normal([IMAGE_SIZE // 4 * IMAGE_SIZE // 4 * 64, 512],\n                        stddev=0.1,\n                        seed=SEED,\n                        dtype=tf.float32))\nfc1_biases = tf.Variable(tf.constant(0.1, shape=[512],\n                         dtype=tf.float32))\nfc2_weights = tf.Variable(tf.truncated_normal([512, NUM_LABELS],\n                                              stddev=0.1,\n                                              seed=SEED,\n                                              dtype=tf.float32))\nfc2_biases = tf.Variable(tf.constant(\n    0.1, shape=[NUM_LABELS], dtype=tf.float32))\n\n# Training computation: logits + cross-entropy loss.\nlogits = model(train_data_node, True)\nloss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\n    labels=train_labels_node, logits=logits))\n\n# L2 regularization for the fully connected parameters.\nregularizers = (tf.nn.l2_loss(fc1_weights)\n                + tf.nn.l2_loss(fc1_biases)\n                + tf.nn.l2_loss(fc2_weights)\n                + tf.nn.l2_loss(fc2_biases))\n# Add the regularization term to the loss.\nloss += 5e-4 * regularizers\n\n# Optimizer: set up a variable that's incremented once per batch and\n# controls the learning rate decay.\nbatch = tf.Variable(0, dtype=tf.float32)\n# Decay once per epoch, using an exponential schedule starting at 0.01.\nlearning_rate = tf.train.exponential_decay(\n    0.01,                # Base learning rate.\n    batch * BATCH_SIZE,  # Current index into the dataset.\n    train_size,          # Decay step.\n    0.95,                # Decay rate.\n    staircase=True)\n# Use simple momentum for the optimization.\noptimizer = tf.train.MomentumOptimizer(learning_rate,\n                                       0.9).minimize(loss,\n                                                     global_step=batch)\n\n# Predictions for the current training minibatch.\ntrain_prediction = tf.nn.softmax(logits)\n\n# Predictions for the test and validation, which we'll compute less\n# often.\neval_prediction = tf.nn.softmax(model(eval_data))\n\n# Small utility function to evaluate a dataset by feeding batches of\n# data to {eval_data} and pulling the results from {eval_predictions}.\n# Saves memory and enables this to run on smaller GPUs.\ndef eval_in_batches(data, sess):\n  \"\"\"Get predictions for a dataset by running it in small batches.\"\"\"\n  size = data.shape[0]\n  if size < EVAL_BATCH_SIZE:\n    raise ValueError(\"batch size for evals larger than dataset: %d\"\n                     % size)\n  predictions = numpy.ndarray(shape=(size, NUM_LABELS),\n                              dtype=numpy.float32)\n  for begin in xrange(0, size, EVAL_BATCH_SIZE):\n    end = begin + EVAL_BATCH_SIZE\n    if end <= size:\n      predictions[begin:end, :] = sess.run(\n          eval_prediction,\n          feed_dict={eval_data: data[begin:end, ...]})\n    else:\n      batch_predictions = sess.run(\n          eval_prediction,\n          feed_dict={eval_data: data[-EVAL_BATCH_SIZE:, ...]})\n      predictions[begin:, :] = batch_predictions[begin - size:, :]\n  return predictions\n\n# Create a local session to run the training.\nstart_time = time.time()\nwith tf.Session() as sess:\n  # Run all the initializers to prepare the trainable parameters.\n  tf.global_variables_initializer().run()\n  # Loop through training steps.\n  for step in xrange(int(num_epochs * train_size) // BATCH_SIZE):\n    # Compute the offset of the current minibatch in the data.\n    # Note that we could use better randomization across epochs.\n    offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE)\n    batch_data = train_data[offset:(offset + BATCH_SIZE), ...]\n    batch_labels = train_labels[offset:(offset + BATCH_SIZE)]\n    # This dictionary maps the batch data (as a numpy array) to the\n    # node in the graph it should be fed to.\n    feed_dict = {train_data_node: batch_data,\n                 train_labels_node: batch_labels}\n    # Run the optimizer to update weights.\n    sess.run(optimizer, feed_dict=feed_dict)\n    # print some extra information once reach the evaluation frequency\n    if step % EVAL_FREQUENCY == 0:\n      # fetch some extra nodes' data\n      l, lr, predictions = sess.run([loss, learning_rate,\n                                     train_prediction],\n                                    feed_dict=feed_dict)\n      elapsed_time = time.time() - start_time\n      start_time = time.time()\n      print('Step %d (epoch %.2f), %.1f ms' %\n            (step, float(step) * BATCH_SIZE / train_size,\n             1000 * elapsed_time / EVAL_FREQUENCY))\n      print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr))\n      print('Minibatch error: %.1f%%'\n            % error_rate(predictions, batch_labels))\n      print('Validation error: %.1f%%' % error_rate(\n          eval_in_batches(validation_data, sess), validation_labels))\n      sys.stdout.flush()\n  # Finally print the result!\n  test_error = error_rate(eval_in_batches(test_data, sess),\n                          test_labels)\n  print('Test error: %.1f%%' % test_error)\n"
  },
  {
    "path": "ch7/ptb_word_lm.py",
    "content": "# Copyright 2015 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Example / benchmark for building a PTB LSTM model.\n\nTrains the model described in:\n(Zaremba, et. al.) Recurrent Neural Network Regularization\nhttp://arxiv.org/abs/1409.2329\n\nThere are 3 supported model configurations:\n===========================================\n| config | epochs | train | valid  | test\n===========================================\n| small  | 13     | 37.99 | 121.39 | 115.91\n| medium | 39     | 48.45 |  86.16 |  82.07\n| large  | 55     | 37.87 |  82.62 |  78.29\nThe exact results may vary depending on the random initialization.\n\nThe hyperparameters used in the model:\n- init_scale - the initial scale of the weights\n- learning_rate - the initial value of the learning rate\n- max_grad_norm - the maximum permissible norm of the gradient\n- num_layers - the number of LSTM layers\n- num_steps - the number of unrolled steps of LSTM\n- hidden_size - the number of LSTM units\n- max_epoch - the number of epochs trained with the initial learning rate\n- max_max_epoch - the total number of epochs for training\n- keep_prob - the probability of keeping weights in the dropout layer\n- lr_decay - the decay of the learning rate for each epoch after \"max_epoch\"\n- batch_size - the batch size\n\nThe data required for this example is in the data/ dir of the\nPTB dataset from Tomas Mikolov's webpage:\n\n$ wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz\n$ tar xvf simple-examples.tgz\n\nTo run:\n\n$ python ptb_word_lm.py\n\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport inspect\nimport time\n\nimport numpy as np\nimport tensorflow as tf\n\nimport reader\n\nflags = tf.flags\nlogging = tf.logging\n\nflags.DEFINE_string(\"save_path\", None,\n                    \"Model output directory.\")\nFLAGS = flags.FLAGS\n\n\nclass PTBInput(object):\n  \"\"\"The input data.\"\"\"\n\n  def __init__(self, config, data, name=None):\n    self.batch_size = batch_size = config.batch_size\n    self.num_steps = num_steps = config.num_steps\n    self.epoch_size = ((len(data) // batch_size) - 1) // num_steps\n    self.input_data, self.targets = reader.ptb_producer(\n        data, batch_size, num_steps, name=name)\n\n\nclass PTBModel(object):\n  \"\"\"The PTB model.\"\"\"\n\n  def __init__(self, is_training, config, input_):\n    self.input = input_\n\n    batch_size = input_.batch_size\n    num_steps = input_.num_steps\n    size = config.hidden_size\n    vocab_size = config.vocab_size\n\n    # Slightly better results can be obtained with forget gate biases\n    # initialized to 1 but the hyperparameters of the model would\n    # need to be different than reported in the paper.\n    def lstm_cell():\n      # With the latest TensorFlow source code (as of Mar 27, 2017),\n      # the BasicLSTMCell will need a reuse parameter which is\n      # unfortunately not defined in TensorFlow 1.0. To maintain\n      # backwards compatibility, we add an argument check here:\n      if 'reuse' in inspect.getargspec(\n          tf.contrib.rnn.BasicLSTMCell.__init__).args:\n        return tf.contrib.rnn.BasicLSTMCell(\n            size, forget_bias=0.0, state_is_tuple=True,\n            reuse=tf.get_variable_scope().reuse)\n      else:\n        return tf.contrib.rnn.BasicLSTMCell(\n            size, forget_bias=0.0, state_is_tuple=True)\n    attn_cell = lstm_cell\n    if is_training and config.keep_prob < 1:\n      def attn_cell():\n        return tf.contrib.rnn.DropoutWrapper(\n            lstm_cell(), output_keep_prob=config.keep_prob)\n    cell = tf.contrib.rnn.MultiRNNCell(\n        [attn_cell() for _ in range(config.num_layers)],\n                                    state_is_tuple=True)\n\n    self.initial_state = cell.zero_state(batch_size, tf.float32)\n\n    with tf.device(\"/cpu:0\"):\n      embedding = tf.get_variable(\n          \"embedding\", [vocab_size, size], dtype=tf.float32)\n      inputs = tf.nn.embedding_lookup(embedding, input_.input_data)\n\n    if is_training and config.keep_prob < 1:\n      inputs = tf.nn.dropout(inputs, config.keep_prob)\n\n    outputs = []\n    state = self.initial_state\n    with tf.variable_scope(\"RNN\"):\n      for time_step in range(num_steps):\n        if time_step > 0: tf.get_variable_scope().reuse_variables()\n        (cell_output, state) = cell(inputs[:, time_step, :], state)\n        outputs.append(cell_output)\n\n    output = tf.reshape(tf.stack(axis=1, values=outputs), [-1, size])\n    softmax_w = tf.get_variable(\n        \"softmax_w\", [size, vocab_size], dtype=tf.float32)\n    softmax_b = tf.get_variable(\n        \"softmax_b\", [vocab_size], dtype=tf.float32)\n    logits = tf.matmul(output, softmax_w) + softmax_b\n\n    # Reshape logits to be 3-D tensor for sequence loss\n    logits = tf.reshape(logits, [batch_size, num_steps, vocab_size])\n\n    # use the contrib sequence loss and average over the batches\n    loss = tf.contrib.seq2seq.sequence_loss(\n        logits,\n        input_.targets,\n        tf.ones([batch_size, num_steps], dtype=tf.float32),\n        average_across_timesteps=False,\n        average_across_batch=True\n    )\n\n    # update the cost variables\n    self.cost = cost = tf.reduce_sum(loss)\n    self.final_state = state\n\n    if not is_training:\n      return\n\n    self.lr = tf.Variable(0.0, trainable=False)\n    tvars = tf.trainable_variables()\n    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),\n                                      config.max_grad_norm)\n    optimizer = tf.train.GradientDescentOptimizer(self.lr)\n    self.train_op = optimizer.apply_gradients(\n        zip(grads, tvars),\n        global_step=tf.contrib.framework.get_or_create_global_step())\n\n    self.new_lr = tf.placeholder(\n        tf.float32, shape=[], name=\"new_learning_rate\")\n    self.lr_update = tf.assign(self.lr, self.new_lr)\n\n  def assign_lr(self, session, lr_value):\n    session.run(self.lr_update, feed_dict={self.new_lr: lr_value})\n\n\nclass SmallConfig(object):\n  \"\"\"Small config.\"\"\"\n  init_scale = 0.1\n  learning_rate = 1.0\n  max_grad_norm = 5\n  num_layers = 2\n  num_steps = 20\n  hidden_size = 200\n  max_epoch = 4\n  max_max_epoch = 13\n  keep_prob = 1.0\n  lr_decay = 0.5\n  batch_size = 20\n  vocab_size = 10000\n\n\ndef run_epoch(session, model, eval_op=None, verbose=False):\n  \"\"\"Runs the model on the given data.\"\"\"\n  start_time = time.time()\n  costs = 0.0\n  iters = 0\n  state = session.run(model.initial_state)\n\n  fetches = {\n      \"cost\": model.cost,\n      \"final_state\": model.final_state,\n  }\n  if eval_op is not None:\n    fetches[\"eval_op\"] = eval_op\n\n  for step in range(model.input.epoch_size):\n    feed_dict = {}\n    for i, (c, h) in enumerate(model.initial_state):\n      feed_dict[c] = state[i].c\n      feed_dict[h] = state[i].h\n\n    vals = session.run(fetches, feed_dict)\n    cost = vals[\"cost\"]\n    state = vals[\"final_state\"]\n\n    costs += cost\n    iters += model.input.num_steps\n\n    if verbose and step % (model.input.epoch_size // 10) == 10:\n      print(\"%.3f perplexity: %.3f speed: %.0f wps\" %\n            (step * 1.0 / model.input.epoch_size,\n             np.exp(costs / iters),\n             (iters\n              * model.input.batch_size/(time.time() - start_time))))\n\n  return np.exp(costs / iters)\n\nraw_data = reader.ptb_raw_data(\"./simple-examples/data\")\ntrain_data, valid_data, test_data, _ = raw_data\n\nconfig = SmallConfig()\neval_config = SmallConfig()\neval_config.batch_size = 1\neval_config.num_steps = 1\n\nwith tf.Graph().as_default():\n  initializer = tf.random_uniform_initializer(-config.init_scale,\n                                              config.init_scale)\n\n  with tf.name_scope(\"Train\"):\n    train_input = PTBInput(config=config, data=train_data,\n                           name=\"TrainInput\")\n    with tf.variable_scope(\"Model\", reuse=None,\n                           initializer=initializer):\n      m = PTBModel(is_training=True, config=config,\n                   input_=train_input)\n    tf.summary.scalar(\"Training Loss\", m.cost)\n    tf.summary.scalar(\"Learning Rate\", m.lr)\n\n  with tf.name_scope(\"Valid\"):\n    valid_input = PTBInput(config=config, data=valid_data,\n                           name=\"ValidInput\")\n    with tf.variable_scope(\"Model\", reuse=True,\n                           initializer=initializer):\n      mvalid = PTBModel(is_training=False, config=config,\n                        input_=valid_input)\n    tf.summary.scalar(\"Validation Loss\", mvalid.cost)\n\n  with tf.name_scope(\"Test\"):\n    test_input = PTBInput(config=eval_config, data=test_data,\n                          name=\"TestInput\")\n    with tf.variable_scope(\"Model\", reuse=True,\n                           initializer=initializer):\n      mtest = PTBModel(is_training=False, config=eval_config,\n                       input_=test_input)\n\n  sv = tf.train.Supervisor()\n  with sv.managed_session() as session:\n    for i in range(config.max_max_epoch):\n      lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0)\n      m.assign_lr(session, config.learning_rate * lr_decay)\n\n      print(\"Epoch: %d Learning rate: %.3f\"\n            % (i + 1, session.run(m.lr)))\n      train_perplexity = run_epoch(session, m, eval_op=m.train_op,\n                                   verbose=True)\n      print(\"Epoch: %d Train Perplexity: %.3f\"\n            % (i + 1, train_perplexity))\n      valid_perplexity = run_epoch(session, mvalid)\n      print(\"Epoch: %d Valid Perplexity: %.3f\"\n            % (i + 1, valid_perplexity))\n\n    test_perplexity = run_epoch(session, mtest)\n    print(\"Test Perplexity: %.3f\" % test_perplexity)\n"
  },
  {
    "path": "ch7/reader.py",
    "content": "# Copyright 2015 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\n\"\"\"Utilities for parsing PTB text files.\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport collections\nimport os\nimport sys\n\nimport tensorflow as tf\n\n\ndef _read_words(filename):\n  with tf.gfile.GFile(filename, \"r\") as f:\n    if sys.version_info[0] >= 3:\n      return f.read().replace(\"\\n\", \"<eos>\").split()\n    else:\n      return f.read().decode(\"utf-8\").replace(\"\\n\", \"<eos>\").split()\n\n\ndef _build_vocab(filename):\n  data = _read_words(filename)\n\n  counter = collections.Counter(data)\n  count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0]))\n\n  words, _ = list(zip(*count_pairs))\n  word_to_id = dict(zip(words, range(len(words))))\n\n  return word_to_id\n\n\ndef _file_to_word_ids(filename, word_to_id):\n  data = _read_words(filename)\n  return [word_to_id[word] for word in data if word in word_to_id]\n\n\ndef ptb_raw_data(data_path=None):\n  \"\"\"Load PTB raw data from data directory \"data_path\".\n\n  Reads PTB text files, converts strings to integer ids,\n  and performs mini-batching of the inputs.\n\n  The PTB dataset comes from Tomas Mikolov's webpage:\n\n  http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz\n\n  Args:\n    data_path: string path to the directory where simple-examples.tgz\n               has been extracted.\n\n  Returns:\n    tuple (train_data, valid_data, test_data, vocabulary)\n    where each of the data objects can be passed to PTBIterator.\n  \"\"\"\n\n  train_path = os.path.join(data_path, \"ptb.train.txt\")\n  valid_path = os.path.join(data_path, \"ptb.valid.txt\")\n  test_path = os.path.join(data_path, \"ptb.test.txt\")\n\n  word_to_id = _build_vocab(train_path)\n  train_data = _file_to_word_ids(train_path, word_to_id)\n  valid_data = _file_to_word_ids(valid_path, word_to_id)\n  test_data = _file_to_word_ids(test_path, word_to_id)\n  vocabulary = len(word_to_id)\n  return train_data, valid_data, test_data, vocabulary\n\n\ndef ptb_producer(raw_data, batch_size, num_steps, name=None):\n  \"\"\"Iterate on the raw PTB data.\n\n  This chunks up raw_data into batches of examples and returns\n  Tensors that are drawn from these batches.\n\n  Args:\n    raw_data: one of the raw data outputs from ptb_raw_data.\n    batch_size: int, the batch size.\n    num_steps: int, the number of unrolls.\n    name: the name of this operation (optional).\n\n  Returns:\n    A pair of Tensors, each shaped [batch_size, num_steps]. The\n    second element of the tuple is the same data time-shifted to the\n    right by one.\n\n  Raises:\n    tf.errors.InvalidArgumentError: if batch_size or num_steps are\n    too high.\n  \"\"\"\n  with tf.name_scope(name, \"PTBProducer\",\n                     [raw_data, batch_size, num_steps]):\n    raw_data = tf.convert_to_tensor(raw_data, name=\"raw_data\",\n                                    dtype=tf.int32)\n\n    data_len = tf.size(raw_data)\n    batch_len = data_len // batch_size\n    data = tf.reshape(raw_data[0 : batch_size * batch_len],\n                      [batch_size, batch_len])\n\n    epoch_size = (batch_len - 1) // num_steps\n    assertion = tf.assert_positive(\n        epoch_size,\n        message=\"epoch_size == 0, decrease batch_size or num_steps\")\n    with tf.control_dependencies([assertion]):\n      epoch_size = tf.identity(epoch_size, name=\"epoch_size\")\n\n    i = tf.train.range_input_producer(epoch_size,\n                                      shuffle=False).dequeue()\n    x = tf.strided_slice(data, [0, i * num_steps],\n                         [batch_size, (i + 1) * num_steps])\n    x.set_shape([batch_size, num_steps])\n    y = tf.strided_slice(data, [0, i * num_steps + 1],\n                         [batch_size, (i + 1) * num_steps + 1])\n    y.set_shape([batch_size, num_steps])\n    return x, y\n"
  },
  {
    "path": "ch7/setup.sh",
    "content": "wget http://www.fit.vutbr.cz/~imikolov/rnnlm/simple-examples.tgz\ntar xvf simple-examples.tgz\n"
  },
  {
    "path": "ch8/a3c.py",
    "content": "\"\"\"Asynchronous Advantage Actor-Critic (A3C) algorithm for reinforcement learning.\"\"\"\n\nimport numpy as np\nimport tensorflow as tf\nimport copy\nimport multiprocessing\nimport os\nimport re\nimport threading\nfrom collections import Sequence\nimport pickle\nimport threading\nimport time\nimport numpy as np\nimport os\nimport six\nimport tensorflow as tf\nimport tempfile\n\nfrom tensorgraph import TensorGraph\nfrom tensorgraph import Layer\nfrom tensorgraph import Dense\nfrom tensorgraph import Squeeze\nfrom tensorgraph import Flatten\nfrom tensorgraph import BatchNorm\nfrom tensorgraph import SoftMax\nfrom tensorgraph import Input\n\n\nclass A3CLoss(Layer):\n  \"\"\"This layer computes the loss function for A3C.\"\"\"\n\n  def __init__(self, value_weight, entropy_weight, **kwargs):\n    super(A3CLoss, self).__init__(**kwargs)\n    self.value_weight = value_weight\n    self.entropy_weight = entropy_weight\n\n  def create_tensor(self, **kwargs):\n    reward, action, prob, value, advantage = [\n        layer.out_tensor for layer in self.in_layers\n    ]\n    prob = prob + np.finfo(np.float32).eps\n    log_prob = tf.log(prob)\n    policy_loss = -tf.reduce_mean(\n        advantage * tf.reduce_sum(action * log_prob, axis=1))\n    value_loss = tf.reduce_mean(tf.square(reward - value))\n    entropy = -tf.reduce_mean(tf.reduce_sum(prob * log_prob, axis=1))\n    self.out_tensor = policy_loss + self.value_weight * value_loss - self.entropy_weight * entropy\n    return self.out_tensor\n\n\nclass A3C(object):\n  \"\"\"\n  Implements the Asynchronous Advantage Actor-Critic (A3C) algorithm for reinforcement learning.\n\n  The algorithm is described in Mnih et al, \"Asynchronous Methods for Deep\n  Reinforcement Learning\" (https://arxiv.org/abs/1602.01783).  This class\n  requires the policy to output two quantities: a vector giving the probability\n  of taking each action, and an estimate of the value function for the current\n  state.  It optimizes both outputs at once using a loss that is the sum of three\n  terms:\n\n  1. The policy loss, which seeks to maximize the discounted reward for each action.\n  2. The value loss, which tries to make the value estimate match the actual\n     discounted reward that was attained at each step.\n  3. An entropy term to encourage exploration.\n\n  This class only supports environments with discrete action spaces, not\n  continuous ones.  The \"action\" argument passed to the environment is an\n  integer, giving the index of the action to perform.\n\n  This class supports Generalized Advantage Estimation as described in Schulman\n  et al., \"High-Dimensional Continuous Control Using Generalized Advantage\n  Estimation\" (https://arxiv.org/abs/1506.02438).  This is a method of trading\n  off bias and variance in the advantage estimate, which can sometimes improve\n  the rate of convergance.  Use the advantage_lambda parameter to adjust the\n  tradeoff.\n  \"\"\"\n\n  def __init__(self,\n               env,\n               max_rollout_length=20,\n               discount_factor=0.99,\n               advantage_lambda=0.98,\n               value_weight=1.0,\n               entropy_weight=0.01,\n               optimizer=None,\n               model_dir=None):\n    \"\"\"Create an object for optimizing a policy.\n\n    Parameters\n    ----------\n    env: Environment\n      the Environment to interact with\n    max_rollout_length: int\n      the maximum length of rollouts to generate\n    discount_factor: float\n      the discount factor to use when computing rewards\n    advantage_lambda: float\n      the parameter for trading bias vs. variance in Generalized Advantage Estimation\n    value_weight: float\n      a scale factor for the value loss term in the loss function\n    entropy_weight: float\n      a scale factor for the entropy term in the loss function\n    optimizer: Optimizer\n      the optimizer to use.  If None, a default optimizer is used.\n    model_dir: str\n      the directory in which the model will be saved.  If None, a temporary\n      directory will be created.\n    \"\"\"\n    self._env = env\n    self.max_rollout_length = max_rollout_length\n    self.discount_factor = discount_factor\n    self.advantage_lambda = advantage_lambda\n    self.value_weight = value_weight\n    self.entropy_weight = entropy_weight\n    self._optimizer = None\n    (self._graph, self._features, self._rewards, self._actions,\n     self._action_prob, self._value, self._advantages) = self.build_graph(\n         None, \"global\", model_dir)\n    with self._graph._get_tf(\"Graph\").as_default():\n      self._session = tf.Session()\n\n  def build_graph(self, tf_graph, scope, model_dir):\n    \"\"\"Construct a TensorGraph containing the policy and loss calculations.\"\"\"\n    state_shape = self._env.state_shape\n    features = []\n    for s in state_shape:\n      features.append(Input(shape=[None] + list(s), dtype=tf.float32))\n    d1 = Flatten(in_layers=features)\n    d2 = Dense(\n        in_layers=[d1],\n        activation_fn=tf.nn.relu,\n        normalizer_fn=tf.nn.l2_normalize,\n        normalizer_params={\"dim\": 1},\n        out_channels=64)\n    d3 = Dense(\n        in_layers=[d2],\n        activation_fn=tf.nn.relu,\n        normalizer_fn=tf.nn.l2_normalize,\n        normalizer_params={\"dim\": 1},\n        out_channels=32)\n    d4 = Dense(\n        in_layers=[d3],\n        activation_fn=tf.nn.relu,\n        normalizer_fn=tf.nn.l2_normalize,\n        normalizer_params={\"dim\": 1},\n        out_channels=16)\n    d4 = BatchNorm(in_layers=[d4])\n    d5 = Dense(in_layers=[d4], activation_fn=None, out_channels=9)\n    value = Dense(in_layers=[d4], activation_fn=None, out_channels=1)\n    value = Squeeze(squeeze_dims=1, in_layers=[value])\n    action_prob = SoftMax(in_layers=[d5])\n\n    rewards = Input(shape=(None,))\n    advantages = Input(shape=(None,))\n    actions = Input(shape=(None, self._env.n_actions))\n    loss = A3CLoss(\n        self.value_weight,\n        self.entropy_weight,\n        in_layers=[rewards, actions, action_prob, value, advantages])\n    graph = TensorGraph(\n        batch_size=self.max_rollout_length,\n        graph=tf_graph,\n        model_dir=model_dir)\n    for f in features:\n      graph._add_layer(f)\n    graph.add_output(action_prob)\n    graph.add_output(value)\n    graph.set_loss(loss)\n    graph.set_optimizer(self._optimizer)\n    with graph._get_tf(\"Graph\").as_default():\n      with tf.variable_scope(scope):\n        graph.build()\n    return graph, features, rewards, actions, action_prob, value, advantages\n\n  def fit(self,\n          total_steps,\n          max_checkpoints_to_keep=5,\n          checkpoint_interval=600,\n          restore=False):\n    \"\"\"Train the policy.\n\n    Parameters\n    ----------\n    total_steps: int\n      the total number of time steps to perform on the environment, across all\n      rollouts on all threads\n    max_checkpoints_to_keep: int\n      the maximum number of checkpoint files to keep.  When this number is\n      reached, older files are deleted.\n    checkpoint_interval: float\n      the time interval at which to save checkpoints, measured in seconds\n    restore: bool\n      if True, restore the model from the most recent checkpoint and continue\n      training from there.  If False, retrain the model from scratch.\n    \"\"\"\n    with self._graph._get_tf(\"Graph\").as_default():\n      step_count = [0]\n      workers = []\n      threads = []\n      for i in range(multiprocessing.cpu_count()):\n        workers.append(Worker(self, i))\n      self._session.run(tf.global_variables_initializer())\n      if restore:\n        self.restore()\n      for worker in workers:\n        thread = threading.Thread(\n            name=worker.scope,\n            target=lambda: worker.run(step_count, total_steps))\n        threads.append(thread)\n        thread.start()\n      variables = tf.get_collection(\n          tf.GraphKeys.GLOBAL_VARIABLES, scope=\"global\")\n      saver = tf.train.Saver(variables, max_to_keep=max_checkpoints_to_keep)\n      checkpoint_index = 0\n      while True:\n        threads = [t for t in threads if t.isAlive()]\n        if len(threads) > 0:\n          threads[0].join(checkpoint_interval)\n        checkpoint_index += 1\n        saver.save(\n            self._session, self._graph.save_file, global_step=checkpoint_index)\n        if len(threads) == 0:\n          break\n\n  def predict(self, state):\n    \"\"\"Compute the policy's output predictions for a state.\n\n    Parameters\n    ----------\n    state: array\n      the state of the environment for which to generate predictions\n\n    Returns\n    -------\n    the array of action probabilities, and the estimated value function\n    \"\"\"\n    with self._graph._get_tf(\"Graph\").as_default():\n      feed_dict = self.create_feed_dict(state)\n      tensors = [self._action_prob.out_tensor, self._value.out_tensor]\n      results = self._session.run(tensors, feed_dict=feed_dict)\n      return results[:2]\n\n  def select_action(self,\n                    state,\n                    deterministic=False):\n    \"\"\"Select an action to perform based on the environment's state.\n\n    Parameters\n    ----------\n    state: array\n      the state of the environment for which to select an action\n    deterministic: bool\n      if True, always return the best action (that is, the one with highest\n      probability).  If False, randomly select an action based on the computed\n      probabilities.\n    Returns\n    -------\n    the index of the selected action\n    \"\"\"\n    with self._graph._get_tf(\"Graph\").as_default():\n      feed_dict = self.create_feed_dict(state)\n      tensors = [self._action_prob.out_tensor]\n      results = self._session.run(tensors, feed_dict=feed_dict)\n      probabilities = results[0]\n      if deterministic:\n        return probabilities.argmax()\n      else:\n        return np.random.choice(\n            np.arange(self._env.n_actions), p=probabilities[0])\n\n  def restore(self):\n    \"\"\"Reload the model parameters from the most recent checkpoint file.\"\"\"\n    last_checkpoint = tf.train.latest_checkpoint(self._graph.model_dir)\n    if last_checkpoint is None:\n      raise ValueError(\"No checkpoint found\")\n    with self._graph._get_tf(\"Graph\").as_default():\n      variables = tf.get_collection(\n          tf.GraphKeys.GLOBAL_VARIABLES, scope=\"global\")\n      saver = tf.train.Saver(variables)\n      saver.restore(self._session, last_checkpoint)\n\n  def create_feed_dict(self, state):\n    \"\"\"Create a feed dict for use by predict() or select_action().\"\"\"\n    feed_dict = dict((f.out_tensor, np.expand_dims(s, axis=0))\n                     for f, s in zip(self._features, state))\n    return feed_dict\n\n\nclass Worker(object):\n  \"\"\"A Worker object is created for each training thread.\"\"\"\n\n  def __init__(self, a3c, index):\n    self.a3c = a3c\n    self.index = index\n    self.scope = \"worker%d\" % index\n    self.env = copy.deepcopy(a3c._env)\n    self.env.reset()\n    (self.graph, self.features, self.rewards, self.actions, self.action_prob,\n     self.value, self.advantages) = a3c.build_graph(\n        a3c._graph._get_tf(\"Graph\"), self.scope, None)\n    with a3c._graph._get_tf(\"Graph\").as_default():\n      local_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,\n                                     self.scope)\n      global_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,\n                                      \"global\")\n      gradients = tf.gradients(self.graph.loss.out_tensor, local_vars)\n      grads_and_vars = list(zip(gradients, global_vars))\n      self.train_op = a3c._graph._get_tf(\"Optimizer\").apply_gradients(\n          grads_and_vars)\n      self.update_local_variables = tf.group(\n          * [tf.assign(v1, v2) for v1, v2 in zip(local_vars, global_vars)])\n      self.global_step = self.graph.get_global_step()\n\n  def run(self, step_count, total_steps):\n    with self.graph._get_tf(\"Graph\").as_default():\n      while step_count[0] < total_steps:\n        self.a3c._session.run(self.update_local_variables)\n        states, actions, rewards, values = self.create_rollout()\n        self.process_rollout(states, actions, rewards, values, step_count[0])\n        step_count[0] += len(actions)\n\n  def create_rollout(self):\n    \"\"\"Generate a rollout.\"\"\"\n    n_actions = self.env.n_actions\n    session = self.a3c._session\n    states = []\n    actions = []\n    rewards = []\n    values = []\n\n    # Generate the rollout.\n    for i in range(self.a3c.max_rollout_length):\n      if self.env.terminated:\n        break\n      state = self.env.state\n      states.append(state)\n      feed_dict = self.create_feed_dict(state)\n      results = session.run(\n          [self.action_prob.out_tensor, self.value.out_tensor],\n          feed_dict=feed_dict)\n      probabilities, value = results[:2]\n      action = np.random.choice(np.arange(n_actions), p=probabilities[0])\n      actions.append(action)\n      values.append(float(value))\n      rewards.append(self.env.step(action))\n\n    # Compute an estimate of the reward for the rest of the episode.\n    if not self.env.terminated:\n      feed_dict = self.create_feed_dict(self.env.state)\n      final_value = self.a3c.discount_factor * float(\n          session.run(self.value.out_tensor, feed_dict))\n    else:\n      final_value = 0.0\n    values.append(final_value)\n    if self.env.terminated:\n      self.env.reset()\n    return states, actions, np.array(rewards), np.array(values)\n\n  def process_rollout(self, states, actions, rewards, values, step_count):\n    \"\"\"Train the network based on a rollout.\"\"\"\n\n    # Compute the discounted rewards and advantages.\n    if len(states) == 0:\n      # Rollout creation sometimes fails in multithreaded environment.\n      # Don't process if malformed\n      print(\"Rollout creation failed. Skipping\")    \n      return\n\n    discounted_rewards = rewards.copy()\n    discounted_rewards[-1] += values[-1]\n    advantages = rewards - values[:-1] + self.a3c.discount_factor * np.array(\n        values[1:])\n    for j in range(len(rewards) - 1, 0, -1):\n      discounted_rewards[j-1] += self.a3c.discount_factor * discounted_rewards[j]\n      advantages[j-1] += (\n          self.a3c.discount_factor * self.a3c.advantage_lambda * advantages[j])\n\n    # Convert the actions to one-hot.\n    n_actions = self.env.n_actions\n    actions_matrix = []\n    for action in actions:\n      a = np.zeros(n_actions)\n      a[action] = 1.0\n      actions_matrix.append(a)\n\n    # Rearrange the states into the proper set of arrays.\n    state_arrays = [[] for i in range(len(self.features))]\n    for state in states:\n      for j in range(len(state)):\n        state_arrays[j].append(state[j])\n    \n    # Build the feed dict and apply gradients.\n    feed_dict = {}\n    for f, s in zip(self.features, state_arrays):\n      feed_dict[f.out_tensor] = s\n    feed_dict[self.rewards.out_tensor] = discounted_rewards\n    feed_dict[self.actions.out_tensor] = actions_matrix\n    feed_dict[self.advantages.out_tensor] = advantages\n    feed_dict[self.global_step] = step_count\n    self.a3c._session.run(self.train_op, feed_dict=feed_dict)\n\n  def create_feed_dict(self, state):\n    \"\"\"Create a feed dict for use during a rollout.\"\"\"\n    feed_dict = dict((f.out_tensor, np.expand_dims(s, axis=0))\n                     for f, s in zip(self.features, state))\n    return feed_dict\n"
  },
  {
    "path": "ch8/environment.py",
    "content": "import copy\nimport random\nimport shutil\nimport numpy as np\nimport tensorflow as tf\nimport deepchem as dc\nimport collections\n\nclass Environment(object):\n  \"\"\"An environment in which an actor performs actions to accomplish a task.\n\n  An environment has a current state, which is represented as either a single NumPy\n  array, or optionally a list of NumPy arrays.  When an action is taken, that causes\n  the state to be updated.  Exactly what is meant by an \"action\" is defined by each\n  subclass.  As far as this interface is concerned, it is simply an arbitrary object.\n  The environment also computes a reward for each action, and reports when the task\n  has been terminated (meaning that no more actions may be taken).\n  \"\"\"\n\n  def __init__(self, state_shape, n_actions, state_dtype=None):\n    \"\"\"Subclasses should call the superclass constructor in addition to doing their own initialization.\"\"\"\n    self.state_shape = state_shape\n    self.n_actions = n_actions\n    if state_dtype is None:\n      # Assume all arrays are float32.\n      if isinstance(state_shape[0], collections.Sequence):\n        self.state_dtype = [np.float32] * len(state_shape)\n      else:\n        self.state_dtype = np.float32\n    else:\n      self.state_dtype = state_dtype\n\n\nclass TicTacToeEnvironment(Environment):\n  \"\"\"\n  Play tictactoe against a randomly acting opponent\n  \"\"\"\n  X = np.array([1.0, 0.0])\n  O = np.array([0.0, 1.0])\n  EMPTY = np.array([0.0, 0.0])\n\n  ILLEGAL_MOVE_PENALTY = -3.0\n  LOSS_PENALTY = -3.0\n  NOT_LOSS = 0.1\n  DRAW_REWARD = 5.0\n  WIN_REWARD = 10.0\n\n  def __init__(self):\n    super(TicTacToeEnvironment, self).__init__([(3, 3, 2)], 9)\n    self.state = None\n    self.terminated = None\n    self.reset()\n\n  def reset(self):\n    self.terminated = False\n    self.state = [np.zeros(shape=(3, 3, 2), dtype=np.float32)]\n\n    # Randomize who goes first\n    if random.randint(0, 1) == 1:\n      move = self.get_O_move()\n      self.state[0][move[0]][move[1]] = TicTacToeEnvironment.O\n\n  def step(self, action):\n    self.state = copy.deepcopy(self.state)\n    row = action // 3\n    col = action % 3\n\n    # Illegal move -- the square is not empty\n    if not np.all(self.state[0][row][col] == TicTacToeEnvironment.EMPTY):\n      self.terminated = True\n      return TicTacToeEnvironment.ILLEGAL_MOVE_PENALTY\n\n    # Move X\n    self.state[0][row][col] = TicTacToeEnvironment.X\n\n    # Did X Win\n    if self.check_winner(TicTacToeEnvironment.X):\n      self.terminated = True\n      return TicTacToeEnvironment.WIN_REWARD\n\n    if self.game_over():\n      self.terminated = True\n      return TicTacToeEnvironment.DRAW_REWARD\n\n    move = self.get_O_move()\n    self.state[0][move[0]][move[1]] = TicTacToeEnvironment.O\n\n    # Did O Win\n    if self.check_winner(TicTacToeEnvironment.O):\n      self.terminated = True\n      return TicTacToeEnvironment.LOSS_PENALTY\n\n    if self.game_over():\n      self.terminated = True\n      return TicTacToeEnvironment.DRAW_REWARD\n\n    return TicTacToeEnvironment.NOT_LOSS\n\n  def get_O_move(self):\n    empty_squares = []\n    for row in range(3):\n      for col in range(3):\n        if np.all(self.state[0][row][col] == TicTacToeEnvironment.EMPTY):\n          empty_squares.append((row, col))\n    return random.choice(empty_squares)\n\n  def check_winner(self, player):\n    for i in range(3):\n      row = np.sum(self.state[0][i][:], axis=0)\n      if np.all(row == player * 3):\n        return True\n      col = np.sum(self.state[0][:][i], axis=0)\n      if np.all(col == player * 3):\n        return True\n\n    diag1 = self.state[0][0][0] + self.state[0][1][1] + self.state[0][2][2]\n    if np.all(diag1 == player * 3):\n      return True\n    diag2 = self.state[0][0][2] + self.state[0][1][1] + self.state[0][2][0]\n    if np.all(diag2 == player * 3):\n      return True\n    return False\n\n  def game_over(self):\n    for i in range(3):\n      for j in range(3):\n        if np.all(self.state[0][i][j] == TicTacToeEnvironment.EMPTY):\n          return False\n    return True\n\n  def display(self):\n    state = self.state[0]\n    s = \"\"\n    for row in range(3):\n      for col in range(3):\n        if np.all(state[row][col] == TicTacToeEnvironment.EMPTY):\n          s += \"_\"\n        if np.all(state[row][col] == TicTacToeEnvironment.X):\n          s += \"X\"\n        if np.all(state[row][col] == TicTacToeEnvironment.O):\n          s += \"O\"\n      s += \"\\n\"\n    return s\n"
  },
  {
    "path": "ch8/tensorgraph.py",
    "content": "\"\"\"TensorGraph OOP Framework.\"\"\"\n\nimport numpy as np\nimport tensorflow as tf\nimport copy\nimport multiprocessing\nimport os\nimport re\nimport threading\nfrom collections import Sequence\n\nimport pickle\nimport threading\nimport time\n\nimport numpy as np\nimport os\nimport six\nimport tensorflow as tf\nimport tempfile\n\nclass TensorGraph(object):\n\n  def __init__(self,\n               batch_size=100,\n               random_seed=None,\n               graph=None,\n               learning_rate=0.001,\n               model_dir=None,\n               **kwargs):\n    \"\"\"\n    Parameters\n    ----------\n    batch_size: int\n      default batch size for training and evaluating\n    graph: tensorflow.Graph\n      the Graph in which to create Tensorflow objects.  If None, a new Graph\n      is created.\n    learning_rate: float or LearningRateSchedule\n      the learning rate to use for optimization\n    kwargs\n    \"\"\"\n\n    # Layer Management\n    self.layers = dict()\n    self.features = list()\n    self.labels = list()\n    self.outputs = list()\n    self.task_weights = list()\n    self.loss = None\n    self.built = False\n    self.optimizer = None\n    self.learning_rate = learning_rate\n\n    # Singular place to hold Tensor objects which don't serialize\n    # See TensorGraph._get_tf() for more details on lazy construction\n    self.tensor_objects = {\n        \"Graph\": graph,\n        #\"train_op\": None,\n    }\n    self.global_step = 0\n\n    self.batch_size = batch_size\n    self.random_seed = random_seed\n    if model_dir is not None:\n      if not os.path.exists(model_dir):\n        os.makedirs(model_dir)\n    else:\n      model_dir = tempfile.mkdtemp()\n      self.model_dir_is_temp = True\n    self.model_dir = model_dir\n    self.save_file = \"%s/%s\" % (self.model_dir, \"model\")\n    self.model_class = None\n\n  def _add_layer(self, layer):\n    if layer.name is None:\n      layer.name = \"%s_%s\" % (layer.__class__.__name__, len(self.layers) + 1)\n    if layer.name in self.layers:\n      return\n    if isinstance(layer, Input):\n      self.features.append(layer)\n    self.layers[layer.name] = layer\n    for in_layer in layer.in_layers:\n      self._add_layer(in_layer)\n\n  def topsort(self):\n\n    def add_layers_to_list(layer, sorted_layers):\n      if layer in sorted_layers:\n        return\n      for in_layer in layer.in_layers:\n        add_layers_to_list(in_layer, sorted_layers)\n      sorted_layers.append(layer)\n\n    sorted_layers = []\n    for l in self.features + self.labels + self.task_weights + self.outputs:\n      add_layers_to_list(l, sorted_layers)\n    add_layers_to_list(self.loss, sorted_layers)\n    return sorted_layers\n\n  def build(self):\n    if self.built:\n      return\n    with self._get_tf(\"Graph\").as_default():\n      self._training_placeholder = tf.placeholder(dtype=tf.float32, shape=())\n      if self.random_seed is not None:\n        tf.set_random_seed(self.random_seed)\n      for layer in self.topsort():\n        with tf.name_scope(layer.name):\n          layer.create_tensor(training=self._training_placeholder)\n      self.session = tf.Session()\n\n      self.built = True\n\n  def set_loss(self, layer):\n    self._add_layer(layer)\n    self.loss = layer\n\n  def add_output(self, layer):\n    self._add_layer(layer)\n    self.outputs.append(layer)\n\n  def set_optimizer(self, optimizer):\n    \"\"\"Set the optimizer to use for fitting.\"\"\"\n    self.optimizer = optimizer\n\n  def get_layer_variables(self, layer):\n    \"\"\"Get the list of trainable variables in a layer of the graph.\"\"\"\n    if not self.built:\n      self.build()\n    with self._get_tf(\"Graph\").as_default():\n      if layer.variable_scope == \"\":\n        return []\n      return tf.get_collection(\n          tf.GraphKeys.TRAINABLE_VARIABLES, scope=layer.variable_scope)\n\n  def get_global_step(self):\n    return self._get_tf(\"GlobalStep\")\n\n  def _get_tf(self, obj):\n    \"\"\"Fetches underlying TensorFlow primitives.\n\n    Parameters\n    ----------\n    obj: str\n      If \"Graph\", returns tf.Graph instance. If \"Optimizer\", returns the\n      optimizer. If \"train_op\", returns the train operation. If \"GlobalStep\" returns\n      the global step.\n    Returns\n    -------\n    TensorFlow Object\n\n    \"\"\"\n\n    if obj in self.tensor_objects and self.tensor_objects[obj] is not None:\n      return self.tensor_objects[obj]\n    if obj == \"Graph\":\n      self.tensor_objects[\"Graph\"] = tf.Graph()\n    elif obj == \"Optimizer\":\n      self.tensor_objects[\"Optimizer\"] = tf.train.AdamOptimizer(\n          learning_rate=self.learning_rate,\n          beta1=0.9,\n          beta2=0.999,\n          epsilon=1e-7)\n    elif obj == \"GlobalStep\":\n      with self._get_tf(\"Graph\").as_default():\n        self.tensor_objects[\"GlobalStep\"] = tf.Variable(0, trainable=False)\n    return self._get_tf(obj)\n\n  def restore(self):\n    \"\"\"Reload the values of all variables from the most recent checkpoint file.\"\"\"\n    if not self.built:\n      self.build()\n    last_checkpoint = tf.train.latest_checkpoint(self.model_dir)\n    if last_checkpoint is None:\n      raise ValueError(\"No checkpoint found\")\n    with self._get_tf(\"Graph\").as_default():\n      saver = tf.train.Saver()\n      saver.restore(self.session, last_checkpoint)\n\n  def __del__(self):\n    pass\n\nclass Layer(object):\n\n  def __init__(self, in_layers=None, **kwargs):\n    if \"name\" in kwargs:\n      self.name = kwargs[\"name\"]\n    else:\n      self.name = None\n    if in_layers is None:\n      in_layers = list()\n    if not isinstance(in_layers, Sequence):\n      in_layers = [in_layers]\n    self.in_layers = in_layers\n    self.variable_scope = \"\"\n    self.tb_input = None\n\n  def create_tensor(self, in_layers=None, **kwargs):\n    raise NotImplementedError(\"Subclasses must implement for themselves\")\n\n  def _get_input_tensors(self, in_layers):\n    \"\"\"Get the input tensors to his layer.\n\n    Parameters\n    ----------\n    in_layers: list of Layers or tensors\n      the inputs passed to create_tensor().  If None, this layer's inputs will\n      be used instead.\n    \"\"\"\n    if in_layers is None:\n      in_layers = self.in_layers\n    if not isinstance(in_layers, Sequence):\n      in_layers = [in_layers]\n    tensors = []\n    for input in in_layers:\n      tensors.append(tf.convert_to_tensor(input))\n    return tensors\n\ndef _convert_layer_to_tensor(value, dtype=None, name=None, as_ref=False):\n  return tf.convert_to_tensor(value.out_tensor, dtype=dtype, name=name)\n\n\ntf.register_tensor_conversion_function(Layer, _convert_layer_to_tensor)\n\nclass Dense(Layer):\n\n  def __init__(\n      self,\n      out_channels,\n      activation_fn=None,\n      biases_initializer=tf.zeros_initializer,\n      weights_initializer=tf.contrib.layers.variance_scaling_initializer,\n      **kwargs):\n    \"\"\"Create a dense layer.\n\n    The weight and bias initializers are specified by callable objects that construct\n    and return a Tensorflow initializer when invoked with no arguments.  This will typically\n    be either the initializer class itself (if the constructor does not require arguments),\n    or a TFWrapper (if it does).\n\n    Parameters\n    ----------\n    out_channels: int\n      the number of output values\n    activation_fn: object\n      the Tensorflow activation function to apply to the output\n    biases_initializer: callable object\n      the initializer for bias values.  This may be None, in which case the layer\n      will not include biases.\n    weights_initializer: callable object\n      the initializer for weight values\n    \"\"\"\n    super(Dense, self).__init__(**kwargs)\n    self.out_channels = out_channels\n    self.out_tensor = None\n    self.activation_fn = activation_fn\n    self.biases_initializer = biases_initializer\n    self.weights_initializer = weights_initializer\n\n  def create_tensor(self, in_layers=None, **kwargs):\n    inputs = self._get_input_tensors(in_layers)\n    if len(inputs) != 1:\n      raise ValueError(\"Dense layer can only have one input\")\n    parent = inputs[0]\n    if self.biases_initializer is None:\n      biases_initializer = None\n    else:\n      biases_initializer = self.biases_initializer()\n    out_tensor = tf.contrib.layers.fully_connected(parent,\n                                                   num_outputs=self.out_channels,\n                                                   activation_fn=self.activation_fn,\n                                                   biases_initializer=biases_initializer,\n                                                   weights_initializer=self.weights_initializer(),\n                                                   reuse=False,\n                                                   trainable=True)\n    self.out_tensor = out_tensor\n    return out_tensor\n\nclass Squeeze(Layer):\n\n  def __init__(self, in_layers=None, squeeze_dims=None, **kwargs):\n    self.squeeze_dims = squeeze_dims\n    super(Squeeze, self).__init__(in_layers, **kwargs)\n\n  def create_tensor(self, in_layers=None, **kwargs):\n    inputs = self._get_input_tensors(in_layers)\n    parent_tensor = inputs[0]\n    out_tensor = tf.squeeze(parent_tensor, squeeze_dims=self.squeeze_dims)\n    self.out_tensor = out_tensor\n    return out_tensor\n\nclass BatchNorm(Layer):\n\n  def __init__(self, in_layers=None, **kwargs):\n    super(BatchNorm, self).__init__(in_layers, **kwargs)\n\n  def create_tensor(self, in_layers=None, **kwargs):\n    inputs = self._get_input_tensors(in_layers)\n    parent_tensor = inputs[0]\n    out_tensor = tf.layers.batch_normalization(parent_tensor)\n    self.out_tensor = out_tensor\n    return out_tensor\n\nclass Flatten(Layer):\n  \"\"\"Flatten every dimension except the first\"\"\"\n\n  def __init__(self, in_layers=None, **kwargs):\n    super(Flatten, self).__init__(in_layers, **kwargs)\n\n  def create_tensor(self, in_layers=None, **kwargs):\n    inputs = self._get_input_tensors(in_layers)\n    if len(inputs) != 1:\n      raise ValueError(\"Only One Parent to Flatten\")\n    parent = inputs[0]\n    parent_shape = parent.get_shape()\n    vector_size = 1\n    for i in range(1, len(parent_shape)):\n      vector_size *= parent_shape[i].value\n    parent_tensor = parent\n    out_tensor = tf.reshape(parent_tensor, shape=(-1, vector_size))\n    self.out_tensor = out_tensor\n    return out_tensor\n\nclass SoftMax(Layer):\n\n  def __init__(self, in_layers=None, **kwargs):\n    super(SoftMax, self).__init__(in_layers, **kwargs)\n\n  def create_tensor(self, in_layers=None, **kwargs):\n    inputs = self._get_input_tensors(in_layers)\n    if len(inputs) != 1:\n      raise ValueError(\"Must only Softmax single parent\")\n    parent = inputs[0]\n    out_tensor = tf.contrib.layers.softmax(parent)\n    self.out_tensor = out_tensor\n    return out_tensor\n\nclass Input(Layer):\n\n  def __init__(self, shape, dtype=tf.float32, **kwargs):\n    self._shape = tuple(shape)\n    self.dtype = dtype\n    super(Input, self).__init__(**kwargs)\n\n  def create_tensor(self, in_layers=None, **kwargs):\n    if in_layers is None:\n      in_layers = self.in_layers\n    out_tensor = tf.placeholder(dtype=self.dtype, shape=self._shape)\n    self.out_tensor = out_tensor\n    return out_tensor\n"
  },
  {
    "path": "ch8/tictactoe.py",
    "content": "\"\"\"Adapted from DeepChem Examples by Peter Eastman and Karl Leswing.\"\"\"\n\nimport copy\nimport random\nimport shutil\nimport numpy as np\nimport tensorflow as tf\nimport deepchem as dc\nfrom environment import TicTacToeEnvironment\nfrom a3c import A3C\n\n\ndef eval_tic_tac_toe(value_weight,\n                     num_epoch_rounds=1,\n                     games=10**4,\n                     rollouts=10**5,\n                     advantage_lambda=0.98):\n  \"\"\"\n  Returns the average reward over 10k games after 100k rollouts\n  \n  Parameters\n  ----------\n  value_weight: float\n\n  Returns\n  ------- \n  avg_rewards\n  \"\"\"\n  env = TicTacToeEnvironment()\n  model_dir = \"/tmp/tictactoe\"\n  try:\n    shutil.rmtree(model_dir)\n  except:\n    pass\n\n  avg_rewards = []\n  for j in range(num_epoch_rounds):\n    print(\"Epoch round: %d\" % j)\n    a3c_engine = A3C(\n        env,\n        entropy_weight=0.01,\n        value_weight=value_weight,\n        model_dir=model_dir,\n        advantage_lambda=advantage_lambda)\n    try:\n      a3c_engine.restore()\n    except:\n      print(\"unable to restore\")\n      pass\n    a3c_engine.fit(rollouts)\n    rewards = []\n    for i in range(games):\n      env.reset()\n      reward = -float('inf')\n      while not env.terminated:\n        action = a3c_engine.select_action(env.state)\n        reward = env.step(action)\n      rewards.append(reward)\n    print(\"Mean reward at round %d is %f\" % (j+1, np.mean(rewards)))\n    avg_rewards.append({(j + 1) * rollouts: np.mean(rewards)})\n  return avg_rewards\n\n\ndef main():\n  value_weight = 6.0\n  score = eval_tic_tac_toe(value_weight=0.2, num_epoch_rounds=20,\n                           advantage_lambda=0.,\n                           games=10**4, rollouts=5*10**4)\n  print(score)\n\n\nif __name__ == \"__main__\":\n  main()\n"
  },
  {
    "path": "ch9/cifar10.py",
    "content": "# Copyright 2015 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"Builds the CIFAR-10 network.\n\nSummary of available functions:\n\n # Compute input images and labels for training. If you would like to run\n # evaluations, use inputs() instead.\n inputs, labels = distorted_inputs()\n\n # Compute inference on the model inputs to make a prediction.\n predictions = inference(inputs)\n\n # Compute the total loss of the prediction with respect to the labels.\n loss = loss(predictions, labels)\n\n # Create a graph to run one step of training with respect to the loss.\n train_op = train(loss, global_step)\n\"\"\"\n# pylint: disable=missing-docstring\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os\nimport re\nimport sys\nimport tarfile\n\nfrom six.moves import urllib, xrange # pylint: disable=redefined-builtin\nimport tensorflow as tf\n\nimport cifar10_input\n\nFLAGS = tf.app.flags.FLAGS\n\n# Basic model parameters.\ntf.app.flags.DEFINE_integer('batch_size', 128,\n                            \"\"\"Number of images to process in a batch.\"\"\")\ntf.app.flags.DEFINE_string('data_dir', '/tmp/cifar10_data',\n                           \"\"\"Path to the CIFAR-10 data directory.\"\"\")\n\n# Process images of this size. Note that this differs from the original CIFAR\n# image size of 32 x 32. If one alters this number, then the entire model\n# architecture will change and any model would need to be retrained.\nIMAGE_SIZE = 24\nNUM_CLASSES = 10\n\n# Global constants describing the CIFAR-10 data set.\nNUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000\nNUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000\n\n\n# Constants describing the training process.\nMOVING_AVERAGE_DECAY = 0.9999     # The decay to use for the moving average.\nNUM_EPOCHS_PER_DECAY = 350.0      # Epochs after which learning rate decays.\nLEARNING_RATE_DECAY_FACTOR = 0.1  # Learning rate decay factor.\nINITIAL_LEARNING_RATE = 0.1       # Initial learning rate.\n\n# If a model is trained with multiple GPUs, prefix all Op names with tower_name\n# to differentiate the operations. Note that this prefix is removed from the\n# names of the summaries when visualizing a model.\nTOWER_NAME = 'tower'\n\nDATA_URL = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'\n\n\ndef distorted_inputs():\n  \"\"\"Construct distorted input for CIFAR training using the Reader ops.\n\n  Returns:\n    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.\n    labels: Labels. 1D tensor of [batch_size] size.\n\n  Raises:\n    ValueError: If no data_dir\n  \"\"\"\n  if not FLAGS.data_dir:\n    raise ValueError('Please supply a data_dir')\n  data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin')\n  images, labels = _distorted_inputs(data_dir=data_dir,\n                                     batch_size=FLAGS.batch_size)\n  return images, labels\n\ndef read_cifar10(filename_queue):\n  \"\"\"Reads and parses examples from CIFAR10 data files.\n\n  Recommendation: if you want N-way read parallelism, call this function\n  N times.  This will give you N independent Readers reading different\n  files & positions within those files, which will give better mixing of\n  examples.\n\n  Args:\n    filename_queue: A queue of strings with the filenames to read from.\n\n  Returns:\n    An object representing a single example, with the following fields:\n      height: number of rows in the result (32)\n      width: number of columns in the result (32)\n      depth: number of color channels in the result (3)\n      key: a scalar string Tensor describing the filename & record number\n        for this example.\n      label: an int32 Tensor with the label in the range 0..9.\n      uint8image: a [height, width, depth] uint8 Tensor with the image data\n  \"\"\"\n\n  class CIFAR10Record(object):\n    pass\n  result = CIFAR10Record()\n\n  # Dimensions of the images in the CIFAR-10 dataset.\n  # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the\n  # input format.\n  label_bytes = 1  # 2 for CIFAR-100\n  result.height = 32\n  result.width = 32\n  result.depth = 3\n  image_bytes = result.height * result.width * result.depth\n  # Every record consists of a label followed by the image, with a\n  # fixed number of bytes for each.\n  record_bytes = label_bytes + image_bytes\n\n  # Read a record, getting filenames from the filename_queue.  No\n  # header or footer in the CIFAR-10 format, so we leave header_bytes\n  # and footer_bytes at their default of 0.\n  reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)\n  result.key, value = reader.read(filename_queue)\n\n  # Convert from a string to a vector of uint8 that is record_bytes long.\n  record_bytes = tf.decode_raw(value, tf.uint8)\n\n  # The first bytes represent the label, which we convert from uint8->int32.\n  result.label = tf.cast(\n      tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32)\n\n  # The remaining bytes after the label represent the image, which we reshape\n  # from [depth * height * width] to [depth, height, width].\n  depth_major = tf.reshape(\n      tf.strided_slice(record_bytes, [label_bytes],\n                       [label_bytes + image_bytes]),\n      [result.depth, result.height, result.width])\n  # Convert from [depth, height, width] to [height, width, depth].\n  result.uint8image = tf.transpose(depth_major, [1, 2, 0])\n\n  return result\n\ndef _distorted_inputs(data_dir, batch_size):\n  \"\"\"Construct distorted input for CIFAR training using the Reader ops.\n\n  Args:\n    data_dir: Path to the CIFAR-10 data directory.\n    batch_size: Number of images per batch.\n\n  Returns:\n    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.\n    labels: Labels. 1D tensor of [batch_size] size.\n  \"\"\"\n  filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)\n               for i in xrange(1, 6)]\n  for f in filenames:\n    if not tf.gfile.Exists(f):\n      raise ValueError('Failed to find file: ' + f)\n\n  # Create a queue that produces the filenames to read.\n  filename_queue = tf.train.string_input_producer(filenames)\n\n  # Read examples from files in the filename queue.\n  read_input = read_cifar10(filename_queue)\n  reshaped_image = tf.cast(read_input.uint8image, tf.float32)\n\n  height = IMAGE_SIZE\n  width = IMAGE_SIZE\n\n  # Image processing for training the network. Note the many random\n  # distortions applied to the image.\n\n  # Randomly crop a [height, width] section of the image.\n  distorted_image = tf.random_crop(reshaped_image, [height, width, 3])\n\n  # Randomly flip the image horizontally.\n  distorted_image = tf.image.random_flip_left_right(distorted_image)\n\n  # Because these operations are not commutative, consider randomizing\n  # the order their operation.\n  # NOTE: since per_image_standardization zeros the mean and makes\n  # the stddev unit, this likely has no effect see tensorflow#1458.\n  distorted_image = tf.image.random_brightness(distorted_image,\n                                               max_delta=63)\n  distorted_image = tf.image.random_contrast(distorted_image,\n                                             lower=0.2, upper=1.8)\n\n  # Subtract off the mean and divide by the variance of the pixels.\n  float_image = tf.image.per_image_standardization(distorted_image)\n\n  # Set the shapes of tensors.\n  float_image.set_shape([height, width, 3])\n  read_input.label.set_shape([1])\n\n  # Ensure that the random shuffling has good mixing properties.\n  min_fraction_of_examples_in_queue = 0.4\n  min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *\n                           min_fraction_of_examples_in_queue)\n  print ('Filling queue with %d CIFAR images before starting to train. '\n         'This will take a few minutes.' % min_queue_examples)\n\n  # Generate a batch of images and labels by building up a queue of examples.\n  return _generate_image_and_label_batch(float_image, read_input.label,\n                                         min_queue_examples, batch_size,\n                                         shuffle=True)\n\ndef inputs(eval_data):\n  \"\"\"Construct input for CIFAR evaluation using the Reader ops.\n\n  Args:\n    eval_data: bool, indicating if one should use the train or eval data set.\n\n  Returns:\n    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.\n    labels: Labels. 1D tensor of [batch_size] size.\n\n  Raises:\n    ValueError: If no data_dir\n  \"\"\"\n  if not FLAGS.data_dir:\n    raise ValueError('Please supply a data_dir')\n  data_dir = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin')\n  images, labels = _inputs(eval_data=eval_data,\n                           data_dir=data_dir,\n                           batch_size=FLAGS.batch_size)\n  return images, labels\n\ndef _inputs(eval_data, data_dir, batch_size):\n  \"\"\"Construct input for CIFAR evaluation using the Reader ops.\n\n  Args:\n    eval_data: bool, indicating if one should use the train or eval data set.\n    data_dir: Path to the CIFAR-10 data directory.\n    batch_size: Number of images per batch.\n\n  Returns:\n    images: Images. 4D tensor of [batch_size, IMAGE_SIZE, IMAGE_SIZE, 3] size.\n    labels: Labels. 1D tensor of [batch_size] size.\n  \"\"\"\n  if not eval_data:\n    filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i)\n                 for i in xrange(1, 6)]\n    num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN\n  else:\n    filenames = [os.path.join(data_dir, 'test_batch.bin')]\n    num_examples_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_EVAL\n\n  for f in filenames:\n    if not tf.gfile.Exists(f):\n      raise ValueError('Failed to find file: ' + f)\n\n  # Create a queue that produces the filenames to read.\n  filename_queue = tf.train.string_input_producer(filenames)\n\n  # Read examples from files in the filename queue.\n  read_input = read_cifar10(filename_queue)\n  reshaped_image = tf.cast(read_input.uint8image, tf.float32)\n\n  height = IMAGE_SIZE\n  width = IMAGE_SIZE\n\n  # Image processing for evaluation.\n  # Crop the central [height, width] of the image.\n  resized_image = tf.image.resize_image_with_crop_or_pad(reshaped_image,\n                                                         height, width)\n\n  # Subtract off the mean and divide by the variance of the pixels.\n  float_image = tf.image.per_image_standardization(resized_image)\n\n  # Set the shapes of tensors.\n  float_image.set_shape([height, width, 3])\n  read_input.label.set_shape([1])\n\n  # Ensure that the random shuffling has good mixing properties.\n  min_fraction_of_examples_in_queue = 0.4\n  min_queue_examples = int(num_examples_per_epoch *\n                           min_fraction_of_examples_in_queue)\n\n  # Generate a batch of images and labels by building up a queue of examples.\n  return _generate_image_and_label_batch(float_image, read_input.label,\n                                         min_queue_examples, batch_size,\n                                         shuffle=False)\n\ndef maybe_download_and_extract():\n  \"\"\"Download and extract the tarball from Alex's website.\"\"\"\n  dest_directory = FLAGS.data_dir\n  if not os.path.exists(dest_directory):\n    os.makedirs(dest_directory)\n  filename = DATA_URL.split('/')[-1]\n  filepath = os.path.join(dest_directory, filename)\n  if not os.path.exists(filepath):\n    def _progress(count, block_size, total_size):\n      sys.stdout.write('\\r>> Downloading %s %.1f%%' % (filename,\n          float(count * block_size) / float(total_size) * 100.0))\n      sys.stdout.flush()\n    filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)\n    print()\n    statinfo = os.stat(filepath)\n    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')\n  extracted_dir_path = os.path.join(dest_directory, 'cifar-10-batches-bin')\n  if not os.path.exists(extracted_dir_path):\n    tarfile.open(filepath, 'r:gz').extractall(dest_directory)\n\ndef _generate_image_and_label_batch(image, label, min_queue_examples,\n                                    batch_size, shuffle):\n  \"\"\"Construct a queued batch of images and labels.\n\n  Args:\n    image: 3-D Tensor of [height, width, 3] of type.float32.\n    label: 1-D Tensor of type.int32\n    min_queue_examples: int32, minimum number of samples to retain\n      in the queue that provides of batches of examples.\n    batch_size: Number of images per batch.\n    shuffle: boolean indicating whether to use a shuffling queue.\n\n  Returns:\n    images: Images. 4D tensor of [batch_size, height, width, 3] size.\n    labels: Labels. 1D tensor of [batch_size] size.\n  \"\"\"\n  # Create a queue that shuffles the examples, and then\n  # read 'batch_size' images + labels from the example queue.\n  num_preprocess_threads = 16\n  if shuffle:\n    images, label_batch = tf.train.shuffle_batch(\n        [image, label],\n        batch_size=batch_size,\n        num_threads=num_preprocess_threads,\n        capacity=min_queue_examples + 3 * batch_size,\n        min_after_dequeue=min_queue_examples)\n  else:\n    images, label_batch = tf.train.batch(\n        [image, label],\n        batch_size=batch_size,\n        num_threads=num_preprocess_threads,\n        capacity=min_queue_examples + 3 * batch_size)\n\n  # Display the training images in the visualizer.\n  tf.summary.image('images', images)\n\n  return images, tf.reshape(label_batch, [batch_size])\n"
  },
  {
    "path": "ch9/cifar10_multi_gpu_train.py",
    "content": "# Copyright 2015 The TensorFlow Authors. All Rights Reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"License\");\n# you may not use this file except in compliance with the License.\n# You may obtain a copy of the License at\n#\n#     http://www.apache.org/licenses/LICENSE-2.0\n#\n# Unless required by applicable law or agreed to in writing, software\n# distributed under the License is distributed on an \"AS IS\" BASIS,\n# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n# See the License for the specific language governing permissions and\n# limitations under the License.\n# ==============================================================================\n\n\"\"\"A binary to train CIFAR-10 using multiple GPUs with synchronous updates.\n\nAccuracy:\ncifar10_multi_gpu_train.py achieves ~86% accuracy after 100K steps\n(256 epochs of data) as judged by cifar10_eval.py.\n\nSpeed: With batch_size 128.\n\nSystem        | Step Time (sec/batch)  |     Accuracy\n--------------------------------------------------------------------\n1 Tesla K20m  | 0.35-0.60              | ~86% at 60K steps  (5 hours)\n1 Tesla K40m  | 0.25-0.35              | ~86% at 100K steps (4 hours)\n2 Tesla K20m  | 0.13-0.20              | ~84% at 30K steps  (2.5 hours)\n3 Tesla K20m  | 0.13-0.18              | ~84% at 30K steps\n4 Tesla K20m  | ~0.10                  | ~84% at 30K steps\n\nUsage:\nPlease see the tutorial and website for how to download the CIFAR-10\ndata set, compile the program and train the model.\n\nhttp://tensorflow.org/tutorials/deep_cnn/\n\"\"\"\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nfrom datetime import datetime\nimport os.path\nimport shutil\nimport re\nimport time\n\nimport numpy as np\nfrom six.moves import xrange  # pylint: disable=redefined-builtin\nimport tensorflow as tf\nimport cifar10\n\nFLAGS = tf.app.flags.FLAGS\n\ntf.app.flags.DEFINE_string('train_dir', '/tmp/cifar10_train',\n                           \"\"\"Directory where to write event logs \"\"\"\n                           \"\"\"and checkpoint.\"\"\")\ntf.app.flags.DEFINE_integer('max_steps', 1000000,\n                            \"\"\"Number of batches to run.\"\"\")\ntf.app.flags.DEFINE_integer('num_gpus', 1,\n                            \"\"\"How many GPUs to use.\"\"\")\ntf.app.flags.DEFINE_boolean('log_device_placement', False,\n                            \"\"\"Whether to log device placement.\"\"\")\n\ndef _activation_summary(x):\n  \"\"\"Helper to create summaries for activations.\n\n  Creates a summary that provides a histogram of activations.\n  Creates a summary that measures the sparsity of activations.\n\n  Args:\n    x: Tensor\n  Returns:\n    nothing\n  \"\"\"\n  # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training\n  # session. This helps the clarity of presentation on tensorboard.\n  tensor_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', x.op.name)\n  tf.summary.histogram(tensor_name + '/activations', x)\n  tf.summary.scalar(tensor_name + '/sparsity',\n                                       tf.nn.zero_fraction(x))\n\n\ndef _variable_on_cpu(name, shape, initializer):\n  \"\"\"Helper to create a Variable stored on CPU memory.\n\n  Args:\n    name: name of the variable\n    shape: list of ints\n    initializer: initializer for Variable\n\n  Returns:\n    Variable Tensor\n  \"\"\"\n  with tf.device('/cpu:0'):\n    var = tf.get_variable(name, shape, initializer=initializer, dtype=tf.float32)\n  return var\n\n\ndef _variable_with_weight_decay(name, shape, stddev, wd):\n  \"\"\"Helper to create an initialized Variable with weight decay.\n\n  Note that the Variable is initialized with a truncated normal distribution.\n  A weight decay is added only if one is specified.\n\n  Args:\n    name: name of the variable\n    shape: list of ints\n    stddev: standard deviation of a truncated Gaussian\n    wd: add L2Loss weight decay multiplied by this float. If None, weight\n        decay is not added for this Variable.\n\n  Returns:\n    Variable Tensor\n  \"\"\"\n  var = _variable_on_cpu(\n      name,\n      shape,\n      tf.truncated_normal_initializer(stddev=stddev, dtype=tf.float32))\n  if wd is not None:\n    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')\n    tf.add_to_collection('losses', weight_decay)\n  return var\n\ndef inference(images):\n  \"\"\"Build the CIFAR-10 model.\n\n  Args:\n    images: Images returned from distorted_inputs() or inputs().\n\n  Returns:\n    Logits.\n  \"\"\"\n  # We instantiate all variables using tf.get_variable() instead of\n  # tf.Variable() in order to share variables across multiple GPU training runs.\n  # If we only ran this model on a single GPU, we could simplify this function\n  # by replacing all instances of tf.get_variable() with tf.Variable().\n  #\n  # conv1\n  with tf.variable_scope('conv1') as scope:\n    kernel = _variable_with_weight_decay('weights',\n                                         shape=[5, 5, 3, 64],\n                                         stddev=5e-2,\n                                         wd=0.0)\n    conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')\n    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))\n    pre_activation = tf.nn.bias_add(conv, biases)\n    conv1 = tf.nn.relu(pre_activation, name=scope.name)\n    _activation_summary(conv1)\n\n  # pool1\n  pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1],\n                         padding='SAME', name='pool1')\n  # norm1\n  norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,\n                    name='norm1')\n\n  # conv2\n  with tf.variable_scope('conv2') as scope:\n    kernel = _variable_with_weight_decay('weights',\n                                         shape=[5, 5, 64, 64],\n                                         stddev=5e-2,\n                                         wd=0.0)\n    conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')\n    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))\n    pre_activation = tf.nn.bias_add(conv, biases)\n    conv2 = tf.nn.relu(pre_activation, name=scope.name)\n    _activation_summary(conv2)\n\n  # norm2\n  norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75,\n                    name='norm2')\n  # pool2\n  pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1],\n                         strides=[1, 2, 2, 1], padding='SAME', name='pool2')\n\n  # local3\n  with tf.variable_scope('local3') as scope:\n    # Move everything into depth so we can perform a single matrix multiply.\n    reshape = tf.reshape(pool2, [FLAGS.batch_size, -1])\n    dim = reshape.get_shape()[1].value\n    weights = _variable_with_weight_decay('weights', shape=[dim, 384],\n                                          stddev=0.04, wd=0.004)\n    biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1))\n    local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name)\n    _activation_summary(local3)\n\n  # local4\n  with tf.variable_scope('local4') as scope:\n    weights = _variable_with_weight_decay('weights', shape=[384, 192],\n                                          stddev=0.04, wd=0.004)\n    biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1))\n    local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name)\n    _activation_summary(local4)\n\n  # linear layer(WX + b),\n  # We don't apply softmax here because\n  # tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits\n  # and performs the softmax internally for efficiency.\n  with tf.variable_scope('softmax_linear') as scope:\n    weights = _variable_with_weight_decay('weights', [192, cifar10.NUM_CLASSES],\n                                          stddev=1/192.0, wd=0.0)\n    biases = _variable_on_cpu('biases', [cifar10.NUM_CLASSES],\n                              tf.constant_initializer(0.0))\n    softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)\n    _activation_summary(softmax_linear)\n\n  return softmax_linear\n\ndef loss(logits, labels):\n  \"\"\"Add L2Loss to all the trainable variables.\n\n  Add summary for \"Loss\" and \"Loss/avg\".\n  Args:\n    logits: Logits from inference().\n    labels: Labels from distorted_inputs or inputs(). 1-D tensor\n            of shape [batch_size]\n\n  Returns:\n    Loss tensor of type float.\n  \"\"\"\n  # Calculate the average cross entropy loss across the batch.\n  labels = tf.cast(labels, tf.int64)\n  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(\n      labels=labels, logits=logits, name='cross_entropy_per_example')\n  cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')\n  tf.add_to_collection('losses', cross_entropy_mean)\n\n  # The total loss is defined as the cross entropy loss plus all of the weight\n  # decay terms (L2 loss).\n  return tf.add_n(tf.get_collection('losses'), name='total_loss')\n\ndef tower_loss(scope, images, labels):\n  \"\"\"Calculate the total loss on a single tower running the CIFAR model.\n\n  Args:\n    scope: unique prefix string identifying the CIFAR tower, e.g. 'tower_0'\n    images: Images. 4D tensor of shape [batch_size, height, width, 3].\n    labels: Labels. 1D tensor of shape [batch_size].\n\n  Returns:\n     Tensor of shape [] containing the total loss for a batch of data\n  \"\"\"\n\n  # Build inference Graph.\n  logits = inference(images)\n\n  # Build the portion of the Graph calculating the losses. Note that we will\n  # assemble the total_loss using a custom function below.\n  _ = loss(logits, labels)\n\n  # Assemble all of the losses for the current tower only.\n  losses = tf.get_collection('losses', scope)\n\n  # Calculate the total loss for the current tower.\n  total_loss = tf.add_n(losses, name='total_loss')\n\n  # Attach a scalar summary to all individual losses and the total\n  # loss; do the same for the averaged version of the losses.\n  for l in losses + [total_loss]:\n    # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU\n    # training session. This helps the clarity of presentation on\n    # tensorboard.\n    loss_name = re.sub('%s_[0-9]*/' % cifar10.TOWER_NAME, '', l.op.name)\n    tf.summary.scalar(loss_name, l)\n\n  return total_loss\n\n\ndef average_gradients(tower_grads):\n  \"\"\"Calculate the average gradient for each shared variable across all towers.\n\n  Note that this function provides a synchronization point across all towers.\n\n  Args:\n    tower_grads: List of lists of (gradient, variable) tuples. The\n    outer list is over individual gradients. The inner list is over\n    the gradient calculation for each tower.\n  Returns:\n     List of pairs of (gradient, variable) where the gradient has been averaged\n     across all towers.\n  \"\"\"\n  average_grads = []\n  for grad_and_vars in zip(*tower_grads):\n    # Note that each grad_and_vars looks like the following:\n    #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))\n    grads = []\n    for g, _ in grad_and_vars:\n      # Add 0 dimension to the gradients to represent the tower.\n      expanded_g = tf.expand_dims(g, 0)\n\n      # Append on a 'tower' dimension which we will average over below.\n      grads.append(expanded_g)\n\n    # Average over the 'tower' dimension.\n    grad = tf.concat(axis=0, values=grads)\n    grad = tf.reduce_mean(grad, 0)\n\n    # Keep in mind that the Variables are redundant because they are shared\n    # across towers. So .. we will just return the first tower's pointer to\n    # the Variable.\n    v = grad_and_vars[0][1]\n    grad_and_var = (grad, v)\n    average_grads.append(grad_and_var)\n  return average_grads\n\n\ndef train():\n  \"\"\"Train CIFAR-10 for a number of steps.\"\"\"\n  with tf.Graph().as_default(), tf.device('/cpu:0'):\n    # Create a variable to count the number of train() calls. This equals the\n    # number of batches processed * FLAGS.num_gpus.\n    global_step = tf.get_variable(\n        'global_step', [],\n        initializer=tf.constant_initializer(0), trainable=False)\n\n    # Calculate the learning rate schedule.\n    num_batches_per_epoch = (cifar10.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /\n                             FLAGS.batch_size)\n    decay_steps = int(num_batches_per_epoch * cifar10.NUM_EPOCHS_PER_DECAY)\n\n    # Decay the learning rate exponentially based on the number of steps.\n    lr = tf.train.exponential_decay(cifar10.INITIAL_LEARNING_RATE,\n                                    global_step,\n                                    decay_steps,\n                                    cifar10.LEARNING_RATE_DECAY_FACTOR,\n                                    staircase=True)\n\n    # Create an optimizer that performs gradient descent.\n    opt = tf.train.GradientDescentOptimizer(lr)\n\n    # Get images and labels for CIFAR-10.\n    images, labels = cifar10.distorted_inputs()\n    batch_queue = tf.contrib.slim.prefetch_queue.prefetch_queue(\n          [images, labels], capacity=2 * FLAGS.num_gpus)\n    # Calculate the gradients for each model tower.\n    tower_grads = []\n    with tf.variable_scope(tf.get_variable_scope()):\n      for i in xrange(FLAGS.num_gpus):\n        with tf.device('/gpu:%d' % i):\n          with tf.name_scope('%s_%d' % (cifar10.TOWER_NAME, i)) as scope:\n            # Dequeues one batch for the GPU\n            image_batch, label_batch = batch_queue.dequeue()\n            # Calculate the loss for one tower of the CIFAR model. This function\n            # constructs the entire CIFAR model but shares the variables across\n            # all towers.\n            loss = tower_loss(scope, image_batch, label_batch)\n\n            # Reuse variables for the next tower.\n            tf.get_variable_scope().reuse_variables()\n\n            # Retain the summaries from the final tower.\n            summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)\n\n            # Calculate the gradients for the batch of data on this CIFAR tower.\n            grads = opt.compute_gradients(loss)\n\n            # Keep track of the gradients across all towers.\n            tower_grads.append(grads)\n\n    # We must calculate the mean of each gradient. Note that this is the\n    # synchronization point across all towers.\n    grads = average_gradients(tower_grads)\n\n    # Add a summary to track the learning rate.\n    summaries.append(tf.summary.scalar('learning_rate', lr))\n\n    # Add histograms for gradients.\n    for grad, var in grads:\n      if grad is not None:\n        summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad))\n\n    # Apply the gradients to adjust the shared variables.\n    apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)\n\n    # Add histograms for trainable variables.\n    for var in tf.trainable_variables():\n      summaries.append(tf.summary.histogram(var.op.name, var))\n\n    # Track the moving averages of all trainable variables.\n    variable_averages = tf.train.ExponentialMovingAverage(\n        cifar10.MOVING_AVERAGE_DECAY, global_step)\n    variables_averages_op = variable_averages.apply(tf.trainable_variables())\n\n    # Group all updates to into a single train op.\n    train_op = tf.group(apply_gradient_op, variables_averages_op)\n\n    # Create a saver.\n    saver = tf.train.Saver(tf.global_variables())\n\n    # Build the summary operation from the last tower summaries.\n    summary_op = tf.summary.merge(summaries)\n\n    # Build an initialization operation to run below.\n    init = tf.global_variables_initializer()\n\n    # Start running operations on the Graph. allow_soft_placement must be set to\n    # True to build towers on GPU, as some of the ops do not have GPU\n    # implementations.\n    sess = tf.Session(config=tf.ConfigProto(\n        allow_soft_placement=True,\n        log_device_placement=FLAGS.log_device_placement))\n    sess.run(init)\n\n    # Start the queue runners.\n    tf.train.start_queue_runners(sess=sess)\n\n    summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)\n\n    for step in xrange(FLAGS.max_steps):\n      start_time = time.time()\n      _, loss_value = sess.run([train_op, loss])\n      duration = time.time() - start_time\n\n      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'\n\n      if step % 10 == 0:\n        num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus\n        examples_per_sec = num_examples_per_step / duration\n        sec_per_batch = duration / FLAGS.num_gpus\n\n        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '\n                      'sec/batch)')\n        print (format_str % (datetime.now(), step, loss_value,\n                             examples_per_sec, sec_per_batch))\n\n      if step % 100 == 0:\n        summary_str = sess.run(summary_op)\n        summary_writer.add_summary(summary_str, step)\n\n      # Save the model checkpoint periodically.\n      if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:\n        checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')\n        saver.save(sess, checkpoint_path, global_step=step)\n\n\ndef main(argv=None):  # pylint: disable=unused-argument\n  cifar10.maybe_download_and_extract()\n  if os.path.exists(FLAGS.train_dir):\n    shutil.rmtree(FLAGS.train_dir)\n  os.makedirs(FLAGS.train_dir)\n  train()\n\n\nif __name__ == '__main__':\n  main()\n"
  }
]