Repository: wiseodd/generative-models Branch: master Commit: b930d5fa9e2f Files: 56 Total size: 175.3 KB Directory structure: gitextract_9grku3me/ ├── .gitignore ├── GAN/ │ ├── ali_bigan/ │ │ ├── ali_bigan_pytorch.py │ │ └── ali_bigan_tensorflow.py │ ├── auxiliary_classifier_gan/ │ │ ├── ac_gan_pytorch.py │ │ └── ac_gan_tensorflow.py │ ├── boundary_equilibrium_gan/ │ │ ├── began_pytorch.py │ │ └── began_tensorflow.py │ ├── boundary_seeking_gan/ │ │ ├── bgan_pytorch.py │ │ └── bgan_tensorflow.py │ ├── conditional_gan/ │ │ ├── cgan_pytorch.py │ │ └── cgan_tensorflow.py │ ├── coupled_gan/ │ │ ├── cogan_pytorch.py │ │ └── cogan_tensorflow.py │ ├── disco_gan/ │ │ ├── discogan_pytorch.py │ │ └── discogan_tensorflow.py │ ├── dual_gan/ │ │ ├── dualgan_pytorch.py │ │ └── dualgan_tensorflow.py │ ├── ebgan/ │ │ ├── ebgan_pytorch.py │ │ └── ebgan_tensorflow.py │ ├── f_gan/ │ │ ├── f_gan_pytorch.py │ │ └── f_gan_tensorflow.py │ ├── generative_adversarial_parallelization/ │ │ └── gap_pytorch.py │ ├── gibbsnet/ │ │ └── gibbsnet_pytorch.py │ ├── improved_wasserstein_gan/ │ │ └── wgan_gp_tensorflow.py │ ├── infogan/ │ │ ├── infogan_pytorch.py │ │ └── infogan_tensorflow.py │ ├── least_squares_gan/ │ │ ├── lsgan_pytorch.py │ │ └── lsgan_tensorflow.py │ ├── magan/ │ │ ├── magan_pytorch.py │ │ └── magan_tensorflow.py │ ├── mode_regularized_gan/ │ │ ├── mode_reg_gan_pytorch.py │ │ └── mode_reg_gan_tensorflow.py │ ├── softmax_gan/ │ │ ├── softmax_gan_pytorch.py │ │ └── softmax_gan_tensorflow.py │ ├── vanilla_gan/ │ │ ├── gan_pytorch.py │ │ └── gan_tensorflow.py │ └── wasserstein_gan/ │ ├── wgan_pytorch.py │ └── wgan_tensorflow.py ├── HelmholtzMachine/ │ ├── README.md │ └── vanilla_HM/ │ └── helmholtz.py ├── LICENSE ├── RBM/ │ ├── README.md │ ├── rbm_binary_cd.py │ └── rbm_binary_pcd.py ├── README.md ├── VAE/ │ ├── adversarial_autoencoder/ │ │ ├── aae_pytorch.py │ │ └── aae_tensorflow.py │ ├── adversarial_vb/ │ │ ├── avb_pytorch.py │ │ └── avb_tensorflow.py │ ├── conditional_vae/ │ │ ├── cvae_pytorch.py │ │ └── cvae_tensorflow.py │ ├── denoising_vae/ │ │ ├── dvae_pytorch.py │ │ └── dvae_tensorflow.py │ └── vanilla_vae/ │ ├── vae_pytorch.py │ └── vae_tensorflow.py └── environment.yml ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # IPython Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # dotenv .env # virtualenv venv/ ENV/ # Spyder project settings .spyderproject # Rope project settings .ropeproject *.sublime* MNIST_data/ GAN/out/ VAE/out/ out/ # Unreleased GAN/unrolled_gan/ GAN/loss_sensitive_gan/ GAN/generative_adversarial_parallelization/gap_tensorflow.py .vscode ================================================ FILE: GAN/ali_bigan/ali_bigan_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data from itertools import * mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-3 def log(x): return torch.log(x + 1e-8) # Inference net (Encoder) Q(z|X) Q = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, z_dim) ) # Generator net (Decoder) P(X|z) P = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) D_ = torch.nn.Sequential( torch.nn.Linear(X_dim + z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1), torch.nn.Sigmoid() ) def D(X, z): return D_(torch.cat([X, z], 1)) def reset_grad(): Q.zero_grad() P.zero_grad() D_.zero_grad() G_solver = optim.Adam(chain(Q.parameters(), P.parameters()), lr=lr) D_solver = optim.Adam(D_.parameters(), lr=lr) for it in range(1000000): # Sample data z = Variable(torch.randn(mb_size, z_dim)) X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) # Discriminator z_hat = Q(X) X_hat = P(z) D_enc = D(X, z_hat) D_gen = D(X_hat, z) D_loss = -torch.mean(log(D_enc) + log(1 - D_gen)) D_loss.backward() D_solver.step() G_solver.step() reset_grad() # Autoencoder Q, P z_hat = Q(X) X_hat = P(z) D_enc = D(X, z_hat) D_gen = D(X_hat, z) G_loss = -torch.mean(log(D_gen) + log(1 - D_enc)) G_loss.backward() G_solver.step() reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss.data[0], G_loss.data[0])) samples = P(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/ali_bigan/ali_bigan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os mb_size = 32 X_dim = 784 z_dim = 64 h_dim = 128 lr = 1e-3 d_steps = 3 mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) def log(x): return tf.log(x + 1e-8) X = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) D_W1 = tf.Variable(xavier_init([X_dim + z_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) Q_W1 = tf.Variable(xavier_init([X_dim, h_dim])) Q_b1 = tf.Variable(tf.zeros(shape=[h_dim])) Q_W2 = tf.Variable(xavier_init([h_dim, z_dim])) Q_b2 = tf.Variable(tf.zeros(shape=[z_dim])) P_W1 = tf.Variable(xavier_init([z_dim, h_dim])) P_b1 = tf.Variable(tf.zeros(shape=[h_dim])) P_W2 = tf.Variable(xavier_init([h_dim, X_dim])) P_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_G = [Q_W1, Q_W2, Q_b1, Q_b2, P_W1, P_W2, P_b1, P_b2] theta_D = [D_W1, D_W2, D_b1, D_b2] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def Q(X): h = tf.nn.relu(tf.matmul(X, Q_W1) + Q_b1) h = tf.matmul(h, Q_W2) + Q_b2 return h def P(z): h = tf.nn.relu(tf.matmul(z, P_W1) + P_b1) h = tf.matmul(h, P_W2) + P_b2 return tf.nn.sigmoid(h) def D(X, z): inputs = tf.concat([X, z], axis=1) h = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) return tf.nn.sigmoid(tf.matmul(h, D_W2) + D_b2) z_hat = Q(X) X_hat = P(z) D_enc = D(X, z_hat) D_gen = D(X_hat, z) D_loss = -tf.reduce_mean(log(D_enc) + log(1 - D_gen)) G_loss = -tf.reduce_mean(log(D_gen) + log(1 - D_enc)) D_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(D_loss, var_list=theta_D)) G_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(G_loss, var_list=theta_G)) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X_mb, _ = mnist.train.next_batch(mb_size) z_mb = sample_z(mb_size, z_dim) _, D_loss_curr = sess.run( [D_solver, D_loss], feed_dict={X: X_mb, z: z_mb} ) _, G_loss_curr = sess.run( [G_solver, G_loss], feed_dict={X: X_mb, z: z_mb} ) if it % 1000 == 0: print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss_curr, G_loss_curr)) samples = sess.run(X_hat, feed_dict={z: sample_z(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/auxiliary_classifier_gan/ac_gan_pytorch.py ================================================ import torch import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 16 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-3 eps = 1e-8 G_ = torch.nn.Sequential( torch.nn.Linear(z_dim + y_dim, h_dim), torch.nn.PReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) def G(z, c): inputs = torch.cat([z, c], 1) return G_(inputs) D_shared = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.PReLU() ) D_gan = torch.nn.Sequential( torch.nn.Linear(h_dim, 1), torch.nn.Sigmoid() ) D_aux = torch.nn.Sequential( torch.nn.Linear(h_dim, y_dim), ) def D(X): h = D_shared(X) return D_gan(h), D_aux(h) nets = [G_, D_shared, D_gan, D_aux] G_params = G_.parameters() D_params = (list(D_shared.parameters()) + list(D_gan.parameters()) + list(D_aux.parameters())) def reset_grad(): for net in nets: net.zero_grad() G_solver = optim.Adam(G_params, lr=lr) D_solver = optim.Adam(D_params, lr=lr) for it in range(100000): # Sample data X, y = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) # c is one-hot c = Variable(torch.from_numpy(y.astype('float32'))) # y_true is not one-hot (requirement from nn.cross_entropy) y_true = Variable(torch.from_numpy(y.argmax(axis=1).astype('int'))) # z noise z = Variable(torch.randn(mb_size, z_dim)) """ Discriminator """ G_sample = G(z, c) D_real, C_real = D(X) D_fake, C_fake = D(G_sample) # GAN's D loss D_loss = torch.mean(torch.log(D_real + eps) + torch.log(1 - D_fake + eps)) # Cross entropy aux loss C_loss = -nn.cross_entropy(C_real, y_true) - nn.cross_entropy(C_fake, y_true) # Maximize DC_loss = -(D_loss + C_loss) DC_loss.backward() D_solver.step() reset_grad() """ Generator """ G_sample = G(z, c) D_fake, C_fake = D(G_sample) _, C_real = D(X) # GAN's G loss G_loss = torch.mean(torch.log(D_fake + eps)) # Cross entropy aux loss C_loss = -nn.cross_entropy(C_real, y_true) - nn.cross_entropy(C_fake, y_true) # Maximize GC_loss = -(G_loss + C_loss) GC_loss.backward() G_solver.step() reset_grad() # Print and plot every now and then if it % 1000 == 0: idx = np.random.randint(0, 10) c = np.zeros([16, y_dim]) c[range(16), idx] = 1 c = Variable(torch.from_numpy(c.astype('float32'))) z = Variable(torch.randn(16, z_dim)) samples = G(z, c).data.numpy() print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}; Idx: {}' .format(it, -D_loss.data[0], -G_loss.data[0], idx)) fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png' .format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/auxiliary_classifier_gan/ac_gan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] z_dim = 10 h_dim = 128 eps = 1e-8 lr = 1e-3 d_steps = 3 def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) X = tf.placeholder(tf.float32, shape=[None, X_dim]) y = tf.placeholder(tf.float32, shape=[None, y_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) G_W1 = tf.Variable(xavier_init([z_dim + y_dim, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_b2 = tf.Variable(tf.zeros(shape=[X_dim])) def generator(z, c): inputs = tf.concat(axis=1, values=[z, c]) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob D_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2_gan = tf.Variable(xavier_init([h_dim, 1])) D_b2_gan = tf.Variable(tf.zeros(shape=[1])) D_W2_aux = tf.Variable(xavier_init([h_dim, y_dim])) D_b2_aux = tf.Variable(tf.zeros(shape=[y_dim])) def discriminator(X): D_h1 = tf.nn.relu(tf.matmul(X, D_W1) + D_b1) out_gan = tf.nn.sigmoid(tf.matmul(D_h1, D_W2_gan) + D_b2_gan) out_aux = tf.matmul(D_h1, D_W2_aux) + D_b2_aux return out_gan, out_aux theta_G = [G_W1, G_W2, G_b1, G_b2] theta_D = [D_W1, D_W2_gan, D_W2_aux, D_b1, D_b2_gan, D_b2_aux] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def cross_entropy(logit, y): return -tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=y)) G_sample = generator(z, y) D_real, C_real = discriminator(X) D_fake, C_fake = discriminator(G_sample) # Cross entropy aux loss C_loss = cross_entropy(C_real, y) + cross_entropy(C_fake, y) # GAN D loss D_loss = tf.reduce_mean(tf.log(D_real + eps) + tf.log(1. - D_fake + eps)) DC_loss = -(D_loss + C_loss) # GAN's G loss G_loss = tf.reduce_mean(tf.log(D_fake + eps)) GC_loss = -(G_loss + C_loss) D_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(DC_loss, var_list=theta_D)) G_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(GC_loss, var_list=theta_G)) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X_mb, y_mb = mnist.train.next_batch(mb_size) z_mb = sample_z(mb_size, z_dim) _, DC_loss_curr = sess.run( [D_solver, DC_loss], feed_dict={X: X_mb, y: y_mb, z: z_mb} ) _, GC_loss_curr = sess.run( [G_solver, GC_loss], feed_dict={X: X_mb, y: y_mb, z: z_mb} ) if it % 1000 == 0: idx = np.random.randint(0, 10) c = np.zeros([16, y_dim]) c[range(16), idx] = 1 samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim), y: c}) print('Iter: {}; DC_loss: {:.4}; GC_loss: {:.4}; Idx; {}' .format(it, DC_loss_curr, GC_loss_curr, idx)) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/boundary_equilibrium_gan/began_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 d_step = 3 lr = 1e-3 m = 5 lam = 1e-3 k = 0 gamma = 0.5 G = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) D_ = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), ) # D is an autoencoder, approximating Gaussian def D(X): X_recon = D_(X) # Use Laplace MLE as in the paper return torch.mean(torch.sum(torch.abs(X - X_recon), 1)) def reset_grad(): G.zero_grad() D_.zero_grad() G_solver = optim.Adam(G.parameters(), lr=lr) D_solver = optim.Adam(D_.parameters(), lr=lr) for it in range(1000000): # Sample data X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) # Dicriminator z_D = Variable(torch.randn(mb_size, z_dim)) D_loss = D(X) - k * D(G(z_D)) D_loss.backward() D_solver.step() reset_grad() # Generator z_G = Variable(torch.randn(mb_size, z_dim)) G_loss = D(G(z_G)) G_loss.backward() G_solver.step() reset_grad() # Update k, the equlibrium k = k + lam * (gamma*D(X) - D(G(z_G))) k = k.data[0] # k is variable, so unvariable it so that no gradient prop. # Print and plot every now and then if it % 1000 == 0: measure = D(X) + torch.abs(gamma*D(X) - D(G(z_G))) print('Iter-{}; Convergence measure: {:.4}' .format(it, measure.data[0])) samples = G(z_G).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/boundary_equilibrium_gan/began_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os mb_size = 32 X_dim = 784 z_dim = 64 h_dim = 128 lr = 1e-3 m = 5 lam = 1e-3 gamma = 0.5 k_curr = 0 mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) X = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) k = tf.placeholder(tf.float32) D_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, X_dim])) D_b2 = tf.Variable(tf.zeros(shape=[X_dim])) G_W1 = tf.Variable(xavier_init([z_dim, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_G = [G_W1, G_W2, G_b1, G_b2] theta_D = [D_W1, D_W2, D_b1, D_b2] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def G(z): G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob def D(X): D_h1 = tf.nn.relu(tf.matmul(X, D_W1) + D_b1) X_recon = tf.matmul(D_h1, D_W2) + D_b2 return tf.reduce_mean(tf.reduce_sum((X - X_recon)**2, 1)) G_sample = G(z) D_real = D(X) D_fake = D(G_sample) D_loss = D_real - k*D_fake G_loss = D_fake D_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(D_loss, var_list=theta_D)) G_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(G_loss, var_list=theta_G)) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X_mb, _ = mnist.train.next_batch(mb_size) _, D_real_curr = sess.run( [D_solver, D_real], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim), k: k_curr} ) _, D_fake_curr = sess.run( [G_solver, D_fake], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)} ) k_curr = k_curr + lam * (gamma*D_real_curr - D_fake_curr) if it % 1000 == 0: measure = D_real_curr + np.abs(gamma*D_real_curr - D_fake_curr) print('Iter-{}; Convergence measure: {:.4}' .format(it, measure)) samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/boundary_seeking_gan/bgan_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-3 def log(x): return torch.log(x + 1e-8) G = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) D = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1), torch.nn.Sigmoid() ) def reset_grad(): G.zero_grad() D.zero_grad() G_solver = optim.Adam(G.parameters(), lr=lr) D_solver = optim.Adam(D.parameters(), lr=lr) for it in range(1000000): # Sample data z = Variable(torch.randn(mb_size, z_dim)) X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) # Dicriminator G_sample = G(z) D_real = D(X) D_fake = D(G_sample) D_loss = -torch.mean(log(D_real) + log(1 - D_fake)) D_loss.backward() D_solver.step() reset_grad() # Generator G_sample = G(z) D_fake = D(G_sample) G_loss = 0.5 * torch.mean((log(D_fake) - log(1 - D_fake))**2) G_loss.backward() G_solver.step() reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss.data[0], G_loss.data[0])) samples = G(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/boundary_seeking_gan/bgan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os mb_size = 32 X_dim = 784 z_dim = 64 h_dim = 128 lr = 1e-3 d_steps = 3 mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) def log(x): return tf.log(x + 1e-8) X = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) D_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) G_W1 = tf.Variable(xavier_init([z_dim, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_G = [G_W1, G_W2, G_b1, G_b2] theta_D = [D_W1, D_W2, D_b1, D_b2] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def generator(z): G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob def discriminator(x): D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1) out = tf.nn.sigmoid(tf.matmul(D_h1, D_W2) + D_b2) return out G_sample = generator(z) D_real = discriminator(X) D_fake = discriminator(G_sample) D_loss = -tf.reduce_mean(log(D_real) + log(1 - D_fake)) G_loss = 0.5 * tf.reduce_mean((log(D_fake) - log(1 - D_fake))**2) D_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(D_loss, var_list=theta_D)) G_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(G_loss, var_list=theta_G)) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X_mb, _ = mnist.train.next_batch(mb_size) z_mb = sample_z(mb_size, z_dim) _, D_loss_curr = sess.run( [D_solver, D_loss], feed_dict={X: X_mb, z: z_mb} ) _, G_loss_curr = sess.run( [G_solver, G_loss], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)} ) if it % 1000 == 0: print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss_curr, G_loss_curr)) samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/conditional_gan/cgan_pytorch.py ================================================ import torch import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 64 Z_dim = 100 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-3 def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / np.sqrt(in_dim / 2.) return Variable(torch.randn(*size) * xavier_stddev, requires_grad=True) """ ==================== GENERATOR ======================== """ Wzh = xavier_init(size=[Z_dim + y_dim, h_dim]) bzh = Variable(torch.zeros(h_dim), requires_grad=True) Whx = xavier_init(size=[h_dim, X_dim]) bhx = Variable(torch.zeros(X_dim), requires_grad=True) def G(z, c): inputs = torch.cat([z, c], 1) h = nn.relu(inputs @ Wzh + bzh.repeat(inputs.size(0), 1)) X = nn.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1)) return X """ ==================== DISCRIMINATOR ======================== """ Wxh = xavier_init(size=[X_dim + y_dim, h_dim]) bxh = Variable(torch.zeros(h_dim), requires_grad=True) Why = xavier_init(size=[h_dim, 1]) bhy = Variable(torch.zeros(1), requires_grad=True) def D(X, c): inputs = torch.cat([X, c], 1) h = nn.relu(inputs @ Wxh + bxh.repeat(inputs.size(0), 1)) y = nn.sigmoid(h @ Why + bhy.repeat(h.size(0), 1)) return y G_params = [Wzh, bzh, Whx, bhx] D_params = [Wxh, bxh, Why, bhy] params = G_params + D_params """ ===================== TRAINING ======================== """ def reset_grad(): for p in params: if p.grad is not None: data = p.grad.data p.grad = Variable(data.new().resize_as_(data).zero_()) G_solver = optim.Adam(G_params, lr=1e-3) D_solver = optim.Adam(D_params, lr=1e-3) ones_label = Variable(torch.ones(mb_size, 1)) zeros_label = Variable(torch.zeros(mb_size, 1)) for it in range(100000): # Sample data z = Variable(torch.randn(mb_size, Z_dim)) X, c = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) c = Variable(torch.from_numpy(c.astype('float32'))) # Dicriminator forward-loss-backward-update G_sample = G(z, c) D_real = D(X, c) D_fake = D(G_sample, c) D_loss_real = nn.binary_cross_entropy(D_real, ones_label) D_loss_fake = nn.binary_cross_entropy(D_fake, zeros_label) D_loss = D_loss_real + D_loss_fake D_loss.backward() D_solver.step() # Housekeeping - reset gradient reset_grad() # Generator forward-loss-backward-update z = Variable(torch.randn(mb_size, Z_dim)) G_sample = G(z, c) D_fake = D(G_sample, c) G_loss = nn.binary_cross_entropy(D_fake, ones_label) G_loss.backward() G_solver.step() # Housekeeping - reset gradient reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {}; G_loss: {}'.format(it, D_loss.data.numpy(), G_loss.data.numpy())) c = np.zeros(shape=[mb_size, y_dim], dtype='float32') c[:, np.random.randint(0, 10)] = 1. c = Variable(torch.from_numpy(c)) samples = G(z, c).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/conditional_gan/cgan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 64 Z_dim = 100 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) """ Discriminator Net model """ X = tf.placeholder(tf.float32, shape=[None, 784]) y = tf.placeholder(tf.float32, shape=[None, y_dim]) D_W1 = tf.Variable(xavier_init([X_dim + y_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) theta_D = [D_W1, D_W2, D_b1, D_b2] def discriminator(x, y): inputs = tf.concat(axis=1, values=[x, y]) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_logit = tf.matmul(D_h1, D_W2) + D_b2 D_prob = tf.nn.sigmoid(D_logit) return D_prob, D_logit """ Generator Net model """ Z = tf.placeholder(tf.float32, shape=[None, Z_dim]) G_W1 = tf.Variable(xavier_init([Z_dim + y_dim, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_G = [G_W1, G_W2, G_b1, G_b2] def generator(z, y): inputs = tf.concat(axis=1, values=[z, y]) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob def sample_Z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig G_sample = generator(Z, y) D_real, D_logit_real = discriminator(X, y) D_fake, D_logit_fake = discriminator(G_sample, y) D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_real, labels=tf.ones_like(D_logit_real))) D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake))) D_loss = D_loss_real + D_loss_fake G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake))) D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): if it % 1000 == 0: n_sample = 16 Z_sample = sample_Z(n_sample, Z_dim) y_sample = np.zeros(shape=[n_sample, y_dim]) y_sample[:, 7] = 1 samples = sess.run(G_sample, feed_dict={Z: Z_sample, y:y_sample}) fig = plot(samples) plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) X_mb, y_mb = mnist.train.next_batch(mb_size) Z_sample = sample_Z(mb_size, Z_dim) _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, Z: Z_sample, y:y_mb}) _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: Z_sample, y:y_mb}) if it % 1000 == 0: print('Iter: {}'.format(it)) print('D loss: {:.4}'. format(D_loss_curr)) print('G_loss: {:.4}'.format(G_loss_curr)) print() ================================================ FILE: GAN/coupled_gan/cogan_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data import copy import scipy.ndimage.interpolation mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 100 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-3 """ Shared Generator weights """ G_shared = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), ) """ Generator 1 """ G1_ = torch.nn.Sequential( torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) """ Generator 2 """ G2_ = torch.nn.Sequential( torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) def G1(z): h = G_shared(z) X = G1_(h) return X def G2(z): h = G_shared(z) X = G2_(h) return X """ Shared Discriminator weights """ D_shared = torch.nn.Sequential( torch.nn.Linear(h_dim, 1), torch.nn.Sigmoid() ) """ Discriminator 1 """ D1_ = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU() ) """ Discriminator 2 """ D2_ = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU() ) def D1(X): h = D1_(X) y = D_shared(h) return y def D2(X): h = D2_(X) y = D_shared(h) return y D_params = (list(D1_.parameters()) + list(D2_.parameters()) + list(D_shared.parameters())) G_params = (list(G1_.parameters()) + list(G2_.parameters()) + list(G_shared.parameters())) nets = [G_shared, G1_, G2_, D_shared, D1_, D2_] def reset_grad(): for net in nets: net.zero_grad() G_solver = optim.Adam(G_params, lr=lr) D_solver = optim.Adam(D_params, lr=lr) X_train = mnist.train.images half = int(X_train.shape[0] / 2) # Real image X_train1 = X_train[:half] # Rotated image X_train2 = X_train[half:].reshape(-1, 28, 28) X_train2 = scipy.ndimage.interpolation.rotate(X_train2, 90, axes=(1, 2)) X_train2 = X_train2.reshape(-1, 28*28) # Cleanup del X_train def sample_x(X, size): start_idx = np.random.randint(0, X.shape[0]-size) return Variable(torch.from_numpy(X[start_idx:start_idx+size])) for it in range(100000): X1 = sample_x(X_train1, mb_size) X2 = sample_x(X_train2, mb_size) z = Variable(torch.randn(mb_size, z_dim)) # Dicriminator G1_sample = G1(z) D1_real = D1(X1) D1_fake = D1(G1_sample) G2_sample = G2(z) D2_real = D2(X2) D2_fake = D2(G2_sample) D1_loss = torch.mean(-torch.log(D1_real + 1e-8) - torch.log(1. - D1_fake + 1e-8)) D2_loss = torch.mean(-torch.log(D2_real + 1e-8) - torch.log(1. - D2_fake + 1e-8)) D_loss = D1_loss + D2_loss D_loss.backward() # Average the gradients for p in D_shared.parameters(): p.grad.data = 0.5 * p.grad.data D_solver.step() reset_grad() # Generator G1_sample = G1(z) D1_fake = D1(G1_sample) G2_sample = G2(z) D2_fake = D2(G2_sample) G1_loss = torch.mean(-torch.log(D1_fake + 1e-8)) G2_loss = torch.mean(-torch.log(D2_fake + 1e-8)) G_loss = G1_loss + G2_loss G_loss.backward() # Average the gradients for p in G_shared.parameters(): p.grad.data = 0.5 * p.grad.data G_solver.step() reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D1_loss: {:.4}; G1_loss: {:.4}; ' 'D2_loss: {:.4}; G2_loss: {:.4}' .format( it, D1_loss.data[0], G1_loss.data[0], D2_loss.data[0], G2_loss.data[0]) ) z = Variable(torch.randn(8, z_dim)) samples1 = G1(z).data.numpy() samples2 = G2(z).data.numpy() samples = np.vstack([samples1, samples2]) fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png' .format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/coupled_gan/cogan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os import scipy.ndimage.interpolation mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] z_dim = 10 h_dim = 128 eps = 1e-8 lr = 1e-3 d_steps = 3 def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) X1 = tf.placeholder(tf.float32, shape=[None, X_dim]) X2 = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) G_W1 = tf.Variable(xavier_init([z_dim, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G1_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G1_b2 = tf.Variable(tf.zeros(shape=[X_dim])) G2_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G2_b2 = tf.Variable(tf.zeros(shape=[X_dim])) def G(z): h = tf.nn.relu(tf.matmul(z, G_W1) + G_b1) G1 = tf.nn.sigmoid(tf.matmul(h, G1_W2) + G1_b2) G2 = tf.nn.sigmoid(tf.matmul(h, G2_W2) + G2_b2) return G1, G2 D1_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D1_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D2_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D2_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) def D(X1, X2): h1 = tf.nn.relu(tf.matmul(X1, D1_W1) + D1_b1) h2 = tf.nn.relu(tf.matmul(X2, D2_W1) + D2_b1) D1_out = tf.nn.sigmoid(tf.matmul(h1, D_W2) + D_b2) D2_out = tf.nn.sigmoid(tf.matmul(h2, D_W2) + D_b2) return D1_out, D2_out theta_G = [G1_W2, G2_W2, G1_b2, G2_b2] theta_G_shared = [G_W1, G_b1] theta_D = [D1_W1, D2_W1, D1_b1, D2_b1] theta_D_shared = [D_W2, D_b2] # Train D G1_sample, G2_sample = G(z) D1_real, D2_real = D(X1, X2) D1_fake, D2_fake = D(G1_sample, G2_sample) D1_loss = -tf.reduce_mean(tf.log(D1_real + eps) + tf.log(1. - D1_fake + eps)) D2_loss = -tf.reduce_mean(tf.log(D2_real + eps) + tf.log(1. - D2_fake + eps)) D_loss = D1_loss + D2_loss # Train G G1_loss = -tf.reduce_mean(tf.log(D1_fake + eps)) G2_loss = -tf.reduce_mean(tf.log(D2_fake + eps)) G_loss = G1_loss + G2_loss # D optimizer D_opt = tf.train.AdamOptimizer(learning_rate=lr) # Compute the gradients for a list of variables. D_gv = D_opt.compute_gradients(D_loss, theta_D) D_shared_gv = D_opt.compute_gradients(D_loss, theta_D_shared) # Average by halfing the shared gradients D_shared_gv = [(0.5 * x[0], x[1]) for x in D_shared_gv] # Update D_solver = tf.group( D_opt.apply_gradients(D_gv), D_opt.apply_gradients(D_shared_gv) ) # G optimizer G_opt = tf.train.AdamOptimizer(learning_rate=lr) # Compute the gradients for a list of variables. G_gv = G_opt.compute_gradients(G_loss, theta_G) G_shared_gv = G_opt.compute_gradients(G_loss, theta_G_shared) # Average by halfing the shared gradients G_shared_gv = [(0.5 * x[0], x[1]) for x in G_shared_gv] # Update G_solver = tf.group( G_opt.apply_gradients(G_gv), G_opt.apply_gradients(G_shared_gv) ) sess = tf.Session() sess.run(tf.global_variables_initializer()) X_train = mnist.train.images half = int(X_train.shape[0] / 2) # Real image X_train1 = X_train[:half] # Rotated image X_train2 = X_train[half:].reshape(-1, 28, 28) X_train2 = scipy.ndimage.interpolation.rotate(X_train2, 90, axes=(1, 2)) X_train2 = X_train2.reshape(-1, 28*28) # Cleanup del X_train def sample_X(X, size): start_idx = np.random.randint(0, X.shape[0]-size) return X[start_idx:start_idx+size] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X1_mb, X2_mb = sample_X(X_train1, mb_size), sample_X(X_train2, mb_size) z_mb = sample_z(mb_size, z_dim) _, D_loss_curr = sess.run( [D_solver, D_loss], feed_dict={X1: X1_mb, X2: X2_mb, z: z_mb} ) _, G_loss_curr = sess.run( [G_solver, G_loss], feed_dict={z: z_mb} ) if it % 1000 == 0: sample1, sample2 = sess.run( [G1_sample, G2_sample], feed_dict={z: sample_z(8, z_dim)} ) samples = np.vstack([sample1, sample2]) print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss_curr, G_loss_curr)) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/disco_gan/discogan_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data import scipy.ndimage.interpolation mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-3 def log(x): return torch.log(x + 1e-8) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig G_AB = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) G_BA = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) D_A = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1), torch.nn.Sigmoid() ) D_B = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1), torch.nn.Sigmoid() ) nets = [G_AB, G_BA, D_A, D_B] G_params = list(G_AB.parameters()) + list(G_BA.parameters()) D_params = list(D_A.parameters()) + list(D_B.parameters()) def reset_grad(): for net in nets: net.zero_grad() G_solver = optim.Adam(G_params, lr=lr) D_solver = optim.Adam(D_params, lr=lr) if not os.path.exists('out/'): os.makedirs('out/') # Gather training data: domain1 <- real MNIST img, domain2 <- rotated MNIST img X_train = mnist.train.images half = int(X_train.shape[0] / 2) # Real image X_train1 = X_train[:half] # Rotated image X_train2 = X_train[half:].reshape(-1, 28, 28) X_train2 = scipy.ndimage.interpolation.rotate(X_train2, 90, axes=(1, 2)) X_train2 = X_train2.reshape(-1, 28*28) # Cleanup del X_train def sample_x(X, size): start_idx = np.random.randint(0, X.shape[0]-size) return Variable(torch.from_numpy(X[start_idx:start_idx+size])) # Training for it in range(1000000): # Sample data from both domains X_A = sample_x(X_train1, mb_size) X_B = sample_x(X_train2, mb_size) # Discriminator A X_BA = G_BA(X_B) D_A_real = D_A(X_A) D_A_fake = D_A(X_BA) L_D_A = -torch.mean(log(D_A_real) + log(1 - D_A_fake)) # Discriminator B X_AB = G_AB(X_A) D_B_real = D_B(X_B) D_B_fake = D_B(X_AB) L_D_B = -torch.mean(log(D_B_real) + log(1 - D_B_fake)) # Total discriminator loss D_loss = L_D_A + L_D_B D_loss.backward() D_solver.step() reset_grad() # Generator AB X_AB = G_AB(X_A) D_B_fake = D_B(X_AB) X_ABA = G_BA(X_AB) L_adv_B = -torch.mean(log(D_B_fake)) L_recon_A = torch.mean(torch.sum((X_A - X_ABA)**2, 1)) L_G_AB = L_adv_B + L_recon_A # Generator BA X_BA = G_BA(X_B) D_A_fake = D_A(X_BA) X_BAB = G_AB(X_BA) L_adv_A = -torch.mean(log(D_A_fake)) L_recon_B = torch.mean(torch.sum((X_B - X_BAB)**2, 1)) L_G_BA = L_adv_A + L_recon_B # Total generator loss G_loss = L_G_AB + L_G_BA G_loss.backward() G_solver.step() reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss.data[0], G_loss.data[0])) input_A = sample_x(X_train1, size=4) input_B = sample_x(X_train2, size=4) samples_A = G_BA(input_B).data.numpy() samples_B = G_AB(input_A).data.numpy() input_A = input_A.data.numpy() input_B = input_B.data.numpy() # The resulting image sample would be in 4 rows: # row 1: real data from domain A, row 2 is its domain B translation # row 3: real data from domain B, row 4 is its domain A translation samples = np.vstack([input_A, samples_B, input_B, samples_A]) fig = plot(samples) plt.savefig('out/{}.png' .format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/disco_gan/discogan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os import scipy.ndimage.interpolation mb_size = 32 X_dim = 784 z_dim = 64 h_dim = 128 lr = 1e-3 d_steps = 3 mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) def log(x): return tf.log(x + 1e-8) X_A = tf.placeholder(tf.float32, shape=[None, X_dim]) X_B = tf.placeholder(tf.float32, shape=[None, X_dim]) D_A_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D_A_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_A_W2 = tf.Variable(xavier_init([h_dim, 1])) D_A_b2 = tf.Variable(tf.zeros(shape=[1])) D_B_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D_B_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_B_W2 = tf.Variable(xavier_init([h_dim, 1])) D_B_b2 = tf.Variable(tf.zeros(shape=[1])) G_AB_W1 = tf.Variable(xavier_init([X_dim, h_dim])) G_AB_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_AB_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_AB_b2 = tf.Variable(tf.zeros(shape=[X_dim])) G_BA_W1 = tf.Variable(xavier_init([X_dim, h_dim])) G_BA_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_BA_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_BA_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_D = [D_A_W1, D_A_W2, D_A_b1, D_A_b2, D_B_W1, D_B_W2, D_B_b1, D_B_b2] theta_G = [G_AB_W1, G_AB_W2, G_AB_b1, G_AB_b2, G_BA_W1, G_BA_W2, G_BA_b1, G_BA_b2] def D_A(X): h = tf.nn.relu(tf.matmul(X, D_A_W1) + D_A_b1) return tf.nn.sigmoid(tf.matmul(h, D_A_W2) + D_A_b2) def D_B(X): h = tf.nn.relu(tf.matmul(X, D_B_W1) + D_B_b1) return tf.nn.sigmoid(tf.matmul(h, D_B_W2) + D_B_b2) def G_AB(X): h = tf.nn.relu(tf.matmul(X, G_AB_W1) + G_AB_b1) return tf.nn.sigmoid(tf.matmul(h, G_AB_W2) + G_AB_b2) def G_BA(X): h = tf.nn.relu(tf.matmul(X, G_BA_W1) + G_BA_b1) return tf.nn.sigmoid(tf.matmul(h, G_BA_W2) + G_BA_b2) # Discriminator A X_BA = G_BA(X_B) D_A_real = D_A(X_A) D_A_fake = D_A(X_BA) # Discriminator B X_AB = G_AB(X_A) D_B_real = D_B(X_B) D_B_fake = D_B(X_AB) # Generator AB X_ABA = G_BA(X_AB) # Generator BA X_BAB = G_AB(X_BA) # Discriminator loss L_D_A = -tf.reduce_mean(log(D_A_real) + log(1 - D_A_fake)) L_D_B = -tf.reduce_mean(log(D_B_real) + log(1 - D_B_fake)) D_loss = L_D_A + L_D_B # Generator loss L_adv_B = -tf.reduce_mean(log(D_B_fake)) L_recon_A = tf.reduce_mean(tf.reduce_sum((X_A - X_ABA)**2, 1)) L_G_AB = L_adv_B + L_recon_A L_adv_A = -tf.reduce_mean(log(D_A_fake)) L_recon_B = tf.reduce_mean(tf.reduce_sum((X_B - X_BAB)**2, 1)) L_G_BA = L_adv_A + L_recon_B G_loss = L_G_AB + L_G_BA # Solvers solver = tf.train.AdamOptimizer(learning_rate=lr) D_solver = solver.minimize(D_loss, var_list=theta_D) G_solver = solver.minimize(G_loss, var_list=theta_G) sess = tf.Session() sess.run(tf.global_variables_initializer()) # Gather training data from 2 domains X_train = mnist.train.images half = int(X_train.shape[0] / 2) # Real image X_train1 = X_train[:half] # Rotated image X_train2 = X_train[half:].reshape(-1, 28, 28) X_train2 = scipy.ndimage.interpolation.rotate(X_train2, 90, axes=(1, 2)) X_train2 = X_train2.reshape(-1, 28*28) # Cleanup del X_train def sample_X(X, size): start_idx = np.random.randint(0, X.shape[0]-size) return X[start_idx:start_idx+size] if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): # Sample data from both domains X_A_mb = sample_X(X_train1, mb_size) X_B_mb = sample_X(X_train2, mb_size) _, D_loss_curr = sess.run( [D_solver, D_loss], feed_dict={X_A: X_A_mb, X_B: X_B_mb} ) _, G_loss_curr = sess.run( [G_solver, G_loss], feed_dict={X_A: X_A_mb, X_B: X_B_mb} ) if it % 1000 == 0: print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss_curr, G_loss_curr)) input_A = sample_X(X_train1, size=4) input_B = sample_X(X_train2, size=4) samples_A = sess.run(X_BA, feed_dict={X_B: input_B}) samples_B = sess.run(X_AB, feed_dict={X_A: input_A}) # The resulting image sample would be in 4 rows: # row 1: real data from domain A, row 2 is its domain B translation # row 3: real data from domain B, row 4 is its domain A translation samples = np.vstack([input_A, samples_B, input_B, samples_A]) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/dual_gan/dualgan_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data from itertools import chain import scipy.ndimage.interpolation mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-4 n_critics = 3 lam1, lam2 = 100, 100 def log(x): return torch.log(x + 1e-8) G1 = torch.nn.Sequential( torch.nn.Linear(X_dim + z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) G2 = torch.nn.Sequential( torch.nn.Linear(X_dim + z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) D1 = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1) ) D2 = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1) ) def reset_grad(): G1.zero_grad() G2.zero_grad() D1.zero_grad() D2.zero_grad() G_solver = optim.RMSprop(chain(G1.parameters(), G2.parameters()), lr=lr) D1_solver = optim.RMSprop(D1.parameters(), lr=lr) D2_solver = optim.RMSprop(D2.parameters(), lr=lr) X_train = mnist.train.images half = int(X_train.shape[0] / 2) # Real image X_train1 = X_train[:half] # Rotated image X_train2 = X_train[half:].reshape(-1, 28, 28) X_train2 = scipy.ndimage.interpolation.rotate(X_train2, 90, axes=(1, 2)) X_train2 = X_train2.reshape(-1, 28*28) # Cleanup del X_train def sample_x(X, size): start_idx = np.random.randint(0, X.shape[0]-size) return Variable(torch.from_numpy(X[start_idx:start_idx+size])) for it in range(1000000): for _ in range(n_critics): # Sample data z1 = Variable(torch.randn(mb_size, z_dim)) z2 = Variable(torch.randn(mb_size, z_dim)) X1 = sample_x(X_train1, mb_size) X2 = sample_x(X_train2, mb_size) # D1 X2_sample = G1(torch.cat([X1, z1], 1)) # G1: X1 -> X2 D1_real = D1(X2) D1_fake = D1(X2_sample) D1_loss = -(torch.mean(D1_real) - torch.mean(D1_fake)) D1_loss.backward(retain_graph=True) D1_solver.step() # Weight clipping for p in D1.parameters(): p.data.clamp_(-0.01, 0.01) reset_grad() # D2 X1_sample = G2(torch.cat([X2, z2], 1)) # G2: X2 -> X1 D2_real = D2(X1) D2_fake = D2(X1_sample) D2_loss = -(torch.mean(D2_real) - torch.mean(D2_fake)) D2_loss.backward() D2_solver.step() # Weight clipping for p in D2.parameters(): p.data.clamp_(-0.01, 0.01) reset_grad() # Generator z1 = Variable(torch.randn(mb_size, z_dim)) z2 = Variable(torch.randn(mb_size, z_dim)) X1 = sample_x(X_train1, mb_size) X2 = sample_x(X_train2, mb_size) X1_sample = G2(torch.cat([X2, z2], 1)) X2_sample = G1(torch.cat([X1, z1], 1)) X1_recon = G2(torch.cat([X2_sample, z2], 1)) X2_recon = G1(torch.cat([X1_sample, z1], 1)) D1_fake = D1(X1_sample) D2_fake = D2(X2_sample) G_loss = -torch.mean(D1_fake) - torch.mean(D2_fake) reg1 = lam1 * torch.mean(torch.sum(torch.abs(X1_recon - X1), 1)) reg2 = lam2 * torch.mean(torch.sum(torch.abs(X2_recon - X2), 1)) G_loss += reg1 + reg2 G_loss.backward() G_solver.step() reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D1_loss.data[0] + D2_loss.data[0], G_loss.data[0])) real1 = X1.data.numpy()[:4] real2 = X2.data.numpy()[:4] samples1 = X1_sample.data.numpy()[:4] samples2 = X2_sample.data.numpy()[:4] samples = np.vstack([real2, samples1, real1, samples2]) fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/dual_gan/dualgan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os import scipy.ndimage.interpolation mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] z_dim = 10 h_dim = 128 eps = 1e-8 lr = 1e-3 d_steps = 3 lam1, lam2 = 1000, 1000 def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) X1 = tf.placeholder(tf.float32, shape=[None, X_dim]) X2 = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) G1_W1 = tf.Variable(xavier_init([X_dim + z_dim, h_dim])) G1_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G1_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G1_b2 = tf.Variable(tf.zeros(shape=[X_dim])) G2_W1 = tf.Variable(xavier_init([X_dim + z_dim, h_dim])) G2_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G2_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G2_b2 = tf.Variable(tf.zeros(shape=[X_dim])) def G1(X1, z): inputs = tf.concat([X1, z], 1) h = tf.nn.relu(tf.matmul(inputs, G1_W1) + G1_b1) return tf.nn.sigmoid(tf.matmul(h, G1_W2) + G1_b2) def G2(X2, z): inputs = tf.concat([X2, z], 1) h = tf.nn.relu(tf.matmul(inputs, G2_W1) + G2_b1) return tf.nn.sigmoid(tf.matmul(h, G2_W2) + G2_b2) D1_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D1_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D1_W2 = tf.Variable(xavier_init([h_dim, 1])) D1_b2 = tf.Variable(tf.zeros(shape=[1])) D2_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D2_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D2_W2 = tf.Variable(xavier_init([h_dim, 1])) D2_b2 = tf.Variable(tf.zeros(shape=[1])) def D1(X): h = tf.nn.relu(tf.matmul(X, D1_W1) + D1_b1) return tf.matmul(h, D1_W2) + D1_b2 def D2(X): h = tf.nn.relu(tf.matmul(X, D1_W1) + D1_b1) return tf.matmul(h, D2_W2) + D2_b2 theta_G1 = [G1_W1, G1_W2, G1_b2, G1_b2] theta_G2 = [G2_W1, G2_b1, G2_W2, G2_b2] theta_G = theta_G1 + theta_G2 theta_D1 = [D1_W1, D1_W2, D1_b1, D1_b2] theta_D2 = [D2_W1, D2_b1, D2_W2, D2_b2] # D X1_sample = G2(X2, z) X2_sample = G1(X1, z) D1_real = D1(X2) D1_fake = D1(X2_sample) D2_real = D2(X1) D2_fake = D2(X1_sample) D1_G = D1(X1_sample) D2_G = D2(X2_sample) X1_recon = G2(X2_sample, z) X2_recon = G1(X1_sample, z) recon1 = tf.reduce_mean(tf.reduce_sum(tf.abs(X1 - X1_recon), 1)) recon2 = tf.reduce_mean(tf.reduce_sum(tf.abs(X2 - X2_recon), 1)) D1_loss = tf.reduce_mean(D1_fake) - tf.reduce_mean(D1_real) D2_loss = tf.reduce_mean(D2_fake) - tf.reduce_mean(D2_real) G_loss = -tf.reduce_mean(D1_G + D2_G) + lam1*recon1 + lam2*recon2 D1_solver = (tf.train.RMSPropOptimizer(learning_rate=1e-4) .minimize(D1_loss, var_list=theta_D1)) D2_solver = (tf.train.RMSPropOptimizer(learning_rate=1e-4) .minimize(D2_loss, var_list=theta_D2)) G_solver = (tf.train.RMSPropOptimizer(learning_rate=1e-4) .minimize(G_loss, var_list=theta_G)) clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in theta_D1 + theta_D2] sess = tf.Session() sess.run(tf.global_variables_initializer()) X_train = mnist.train.images half = int(X_train.shape[0] / 2) # Real image X_train1 = X_train[:half] # Rotated image X_train2 = X_train[half:].reshape(-1, 28, 28) X_train2 = scipy.ndimage.interpolation.rotate(X_train2, 90, axes=(1, 2)) X_train2 = X_train2.reshape(-1, 28*28) # Cleanup del X_train def sample_X(X, size): start_idx = np.random.randint(0, X.shape[0]-size) return X[start_idx:start_idx+size] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): for _ in range(d_steps): X1_mb, X2_mb = sample_X(X_train1, mb_size), sample_X(X_train2, mb_size) z_mb = sample_z(mb_size, z_dim) _, _, D1_loss_curr, D2_loss_curr, _ = sess.run( [D1_solver, D2_solver, D1_loss, D2_loss, clip_D], feed_dict={X1: X1_mb, X2: X2_mb, z: z_mb} ) _, G_loss_curr = sess.run( [G_solver, G_loss], feed_dict={X1: X1_mb, X2: X2_mb, z: z_mb} ) if it % 1000 == 0: sample1, sample2 = sess.run( [X1_sample, X2_sample], feed_dict={X1: X1_mb[:4], X2: X2_mb[:4], z: sample_z(4, z_dim)} ) samples = np.vstack([X1_mb[:4], sample1, X2_mb[:4], sample2]) print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D1_loss_curr + D2_loss_curr, G_loss_curr)) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/ebgan/ebgan_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 d_step = 3 lr = 1e-3 m = 5 G = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) # D is an autoencoder D_ = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), ) # Energy is the MSE of autoencoder def D(X): X_recon = D_(X) return torch.mean(torch.sum((X - X_recon)**2, 1)) def reset_grad(): G.zero_grad() D_.zero_grad() G_solver = optim.Adam(G.parameters(), lr=lr) D_solver = optim.Adam(D_.parameters(), lr=lr) for it in range(1000000): # Sample data z = Variable(torch.randn(mb_size, z_dim)) X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) # Dicriminator G_sample = G(z) D_real = D(X) D_fake = D(G_sample) # EBGAN D loss. D_real and D_fake is energy, i.e. a number D_loss = D_real + nn.relu(m - D_fake) # Reuse D_fake for generator loss D_loss.backward() D_solver.step() reset_grad() # Generator G_sample = G(z) D_fake = D(G_sample) G_loss = D_fake G_loss.backward() G_solver.step() reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss.data[0], G_loss.data[0])) samples = G(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/ebgan/ebgan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os mb_size = 32 X_dim = 784 z_dim = 64 h_dim = 128 lr = 1e-3 m = 5 mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) X = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) D_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, X_dim])) D_b2 = tf.Variable(tf.zeros(shape=[X_dim])) G_W1 = tf.Variable(xavier_init([z_dim, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_G = [G_W1, G_W2, G_b1, G_b2] theta_D = [D_W1, D_W2, D_b1, D_b2] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def generator(z): G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob def discriminator(X): D_h1 = tf.nn.relu(tf.matmul(X, D_W1) + D_b1) X_recon = tf.matmul(D_h1, D_W2) + D_b2 mse = tf.reduce_mean(tf.reduce_sum((X - X_recon)**2, 1)) return mse G_sample = generator(z) D_real = discriminator(X) D_fake = discriminator(G_sample) D_loss = D_real + tf.maximum(0., m - D_fake) G_loss = D_fake D_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(D_loss, var_list=theta_D)) G_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(G_loss, var_list=theta_G)) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X_mb, _ = mnist.train.next_batch(mb_size) z_mb = sample_z(mb_size, z_dim) _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, z: z_mb}) _, G_loss_curr = sess.run( [G_solver, G_loss], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)} ) if it % 1000 == 0: print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss_curr, G_loss_curr)) samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/f_gan/f_gan_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-3 def log(x): return torch.log(x + 1e-8) G = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) D = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1), ) def reset_grad(): G.zero_grad() D.zero_grad() G_solver = optim.Adam(G.parameters(), lr=lr) D_solver = optim.Adam(D.parameters(), lr=lr) for it in range(1000000): # Sample data z = Variable(torch.randn(mb_size, z_dim)) X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) # Dicriminator G_sample = G(z) D_real = D(X) D_fake = D(G_sample) # Uncomment D_loss and its respective G_loss of your choice # --------------------------------------------------------- """ Total Variation """ # D_loss = -(torch.mean(0.5 * torch.tanh(D_real)) - # torch.mean(0.5 * torch.tanh(D_fake))) """ Forward KL """ # D_loss = -(torch.mean(D_real) - torch.mean(torch.exp(D_fake - 1))) """ Reverse KL """ D_loss = -(torch.mean(-torch.exp(D_real)) - torch.mean(-1 - D_fake)) """ Pearson Chi-squared """ # D_loss = -(torch.mean(D_real) - torch.mean(0.25*D_fake**2 + D_fake)) """ Squared Hellinger """ # D_loss = -(torch.mean(1 - torch.exp(D_real)) - # torch.mean((1 - torch.exp(D_fake)) / (torch.exp(D_fake)))) D_loss.backward() D_solver.step() reset_grad() # Generator G_sample = G(z) D_fake = D(G_sample) """ Total Variation """ # G_loss = -torch.mean(0.5 * torch.tanh(D_fake)) """ Forward KL """ # G_loss = -torch.mean(torch.exp(D_fake - 1)) """ Reverse KL """ G_loss = -torch.mean(-1 - D_fake) """ Pearson Chi-squared """ # G_loss = -torch.mean(0.25*D_fake**2 + D_fake) """ Squared Hellinger """ # G_loss = -torch.mean((1 - torch.exp(D_fake)) / (torch.exp(D_fake))) G_loss.backward() G_solver.step() reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss.data[0], G_loss.data[0])) samples = G(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/f_gan/f_gan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os mb_size = 32 X_dim = 784 z_dim = 64 h_dim = 128 lr = 1e-3 d_steps = 3 mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) X = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) D_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) G_W1 = tf.Variable(xavier_init([z_dim, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_G = [G_W1, G_W2, G_b1, G_b2] theta_D = [D_W1, D_W2, D_b1, D_b2] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def generator(z): G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob def discriminator(x): D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1) out = tf.matmul(D_h1, D_W2) + D_b2 return out G_sample = generator(z) D_real = discriminator(X) D_fake = discriminator(G_sample) # Uncomment D_loss and its respective G_loss of your choice # --------------------------------------------------------- """ Total Variation """ # D_loss = -(tf.reduce_mean(0.5 * tf.nn.tanh(D_real)) - # tf.reduce_mean(0.5 * tf.nn.tanh(D_fake))) # G_loss = -tf.reduce_mean(0.5 * tf.nn.tanh(D_fake)) """ Forward KL """ # D_loss = -(tf.reduce_mean(D_real) - tf.reduce_mean(tf.exp(D_fake - 1))) # G_loss = -tf.reduce_mean(tf.exp(D_fake - 1)) """ Reverse KL """ # D_loss = -(tf.reduce_mean(-tf.exp(D_real)) - tf.reduce_mean(-1 - D_fake)) # G_loss = -tf.reduce_mean(-1 - D_fake) """ Pearson Chi-squared """ D_loss = -(tf.reduce_mean(D_real) - tf.reduce_mean(0.25*D_fake**2 + D_fake)) G_loss = -tf.reduce_mean(0.25*D_fake**2 + D_fake) """ Squared Hellinger """ # D_loss = -(tf.reduce_mean(1 - tf.exp(D_real)) - # tf.reduce_mean((1 - tf.exp(D_fake)) / (tf.exp(D_fake)))) # G_loss = -tf.reduce_mean((1 - tf.exp(D_fake)) / (tf.exp(D_fake))) D_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(D_loss, var_list=theta_D)) G_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(G_loss, var_list=theta_G)) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X_mb, _ = mnist.train.next_batch(mb_size) z_mb = sample_z(mb_size, z_dim) _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, z: z_mb}) _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={z: z_mb}) if it % 1000 == 0: print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss_curr, G_loss_curr)) samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/generative_adversarial_parallelization/gap_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os import random from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-3 K = 100 def log(x): return torch.log(x + 1e-8) G1_ = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) D1_ = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1), torch.nn.Sigmoid() ) G2_ = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) D2_ = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1), torch.nn.Sigmoid() ) nets = [G1_, D1_, G2_, D2_] def reset_grad(): for net in nets: net.zero_grad() G1_solver = optim.Adam(G1_.parameters(), lr=lr) D1_solver = optim.Adam(D1_.parameters(), lr=lr) G2_solver = optim.Adam(G2_.parameters(), lr=lr) D2_solver = optim.Adam(D2_.parameters(), lr=lr) D1 = {'model': D1_, 'solver': D1_solver} G1 = {'model': G1_, 'solver': G1_solver} D2 = {'model': D2_, 'solver': D2_solver} G2 = {'model': G2_, 'solver': G2_solver} GAN_pairs = [(D1, G1), (D2, G2)] for it in range(1000000): # Sample data z = Variable(torch.randn(mb_size, z_dim)) X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) for D, G in GAN_pairs: # Discriminator G_sample = G['model'](z) D_real = D['model'](X) D_fake = D['model'](G_sample) D_loss = -torch.mean(log(D_real) + log(1 - D_fake)) D_loss.backward() D['solver'].step() reset_grad() # Generator G_sample = G['model'](z) D_fake = D['model'](G_sample) G_loss = -torch.mean(log(D_fake)) G_loss.backward() G['solver'].step() reset_grad() if it != 0 and it % K == 0: # Swap (D, G) pairs new_D1, new_D2 = GAN_pairs[1][0], GAN_pairs[0][0] GAN_pairs = [(new_D1, G1), (new_D2, G2)] # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss.data[0], G_loss.data[0])) # Pick G randomly G_rand = random.choice([G1_, G2_]) samples = G_rand(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/gibbsnet/gibbsnet_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data from itertools import * mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 16 z_dim = 100 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 256 cnt = 0 lr = 1e-4 N = 10 def log(x): return torch.log(x + 1e-8) # Inference net (Encoder) Q(z|X) Q = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, z_dim) ) # Generator net (Decoder) P(X|z) P = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) D_ = torch.nn.Sequential( torch.nn.Linear(X_dim + z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1), torch.nn.Sigmoid() ) def D(X, z): return D_(torch.cat([X, z], 1)) def reset_grad(): Q.zero_grad() P.zero_grad() D_.zero_grad() G_solver = optim.Adam(chain(Q.parameters(), P.parameters()), lr=lr) D_solver = optim.Adam(D_.parameters(), lr=lr) for it in range(1000000): # Sample data X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) # Discriminator z_hat = Q(X) # Do N step Gibbs sampling z = Variable(torch.randn(mb_size, z_dim)) for _ in range(N): z_n = z.clone() X_hat = P(z_n) z = Q(X_hat) p_data = D(X, z_hat) p_model = D(X_hat, z_n) D_loss = -torch.mean(log(p_data) + log(1 - p_model)) D_loss.backward(retain_graph=True) D_solver.step() G_solver.step() reset_grad() G_loss = -torch.mean(log(p_model) + log(1 - p_data)) G_loss.backward() G_solver.step() reset_grad() # Print and plot every now and then if it % 100 == 0: print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss.data[0], G_loss.data[0])) z = Variable(torch.randn(mb_size, z_dim)) for _ in range(N): z_n = z.clone() X_hat = P(z_n) z = Q(X_hat) samples = X_hat.data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/improved_wasserstein_gan/wgan_gp_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os mb_size = 32 X_dim = 784 z_dim = 10 h_dim = 128 lam = 10 n_disc = 5 lr = 1e-4 mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) X = tf.placeholder(tf.float32, shape=[None, X_dim]) D_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) theta_D = [D_W1, D_W2, D_b1, D_b2] z = tf.placeholder(tf.float32, shape=[None, z_dim]) G_W1 = tf.Variable(xavier_init([z_dim, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_G = [G_W1, G_W2, G_b1, G_b2] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def G(z): G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob def D(X): D_h1 = tf.nn.relu(tf.matmul(X, D_W1) + D_b1) out = tf.matmul(D_h1, D_W2) + D_b2 return out G_sample = G(z) D_real = D(X) D_fake = D(G_sample) eps = tf.random_uniform([mb_size, 1], minval=0., maxval=1.) X_inter = eps*X + (1. - eps)*G_sample grad = tf.gradients(D(X_inter), [X_inter])[0] grad_norm = tf.sqrt(tf.reduce_sum((grad)**2, axis=1)) grad_pen = lam * tf.reduce_mean((grad_norm - 1)**2) D_loss = tf.reduce_mean(D_fake) - tf.reduce_mean(D_real) + grad_pen G_loss = -tf.reduce_mean(D_fake) D_solver = (tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5) .minimize(D_loss, var_list=theta_D)) G_solver = (tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5) .minimize(G_loss, var_list=theta_G)) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): for _ in range(n_disc): X_mb, _ = mnist.train.next_batch(mb_size) _, D_loss_curr = sess.run( [D_solver, D_loss], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)} ) _, G_loss_curr = sess.run( [G_solver, G_loss], feed_dict={z: sample_z(mb_size, z_dim)} ) if it % 1000 == 0: print('Iter: {}; D loss: {:.4}; G_loss: {:.4}' .format(it, D_loss_curr, G_loss_curr)) if it % 1000 == 0: samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/infogan/infogan_pytorch.py ================================================ import torch import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 Z_dim = 16 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-3 def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / np.sqrt(in_dim / 2.) return Variable(torch.randn(*size) * xavier_stddev, requires_grad=True) """ ==================== GENERATOR ======================== """ Wzh = xavier_init(size=[Z_dim + 10, h_dim]) bzh = Variable(torch.zeros(h_dim), requires_grad=True) Whx = xavier_init(size=[h_dim, X_dim]) bhx = Variable(torch.zeros(X_dim), requires_grad=True) def G(z, c): inputs = torch.cat([z, c], 1) h = nn.relu(inputs @ Wzh + bzh.repeat(inputs.size(0), 1)) X = nn.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1)) return X """ ==================== DISCRIMINATOR ======================== """ Wxh = xavier_init(size=[X_dim, h_dim]) bxh = Variable(torch.zeros(h_dim), requires_grad=True) Why = xavier_init(size=[h_dim, 1]) bhy = Variable(torch.zeros(1), requires_grad=True) def D(X): h = nn.relu(X @ Wxh + bxh.repeat(X.size(0), 1)) y = nn.sigmoid(h @ Why + bhy.repeat(h.size(0), 1)) return y """ ====================== Q(c|X) ========================== """ Wqxh = xavier_init(size=[X_dim, h_dim]) bqxh = Variable(torch.zeros(h_dim), requires_grad=True) Whc = xavier_init(size=[h_dim, 10]) bhc = Variable(torch.zeros(10), requires_grad=True) def Q(X): h = nn.relu(X @ Wqxh + bqxh.repeat(X.size(0), 1)) c = nn.softmax(h @ Whc + bhc.repeat(h.size(0), 1)) return c G_params = [Wzh, bzh, Whx, bhx] D_params = [Wxh, bxh, Why, bhy] Q_params = [Wqxh, bqxh, Whc, bhc] params = G_params + D_params + Q_params """ ===================== TRAINING ======================== """ def reset_grad(): for p in params: if p.grad is not None: data = p.grad.data p.grad = Variable(data.new().resize_as_(data).zero_()) G_solver = optim.Adam(G_params, lr=1e-3) D_solver = optim.Adam(D_params, lr=1e-3) Q_solver = optim.Adam(G_params + Q_params, lr=1e-3) def sample_c(size): c = np.random.multinomial(1, 10*[0.1], size=size) c = Variable(torch.from_numpy(c.astype('float32'))) return c for it in range(100000): # Sample data X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) z = Variable(torch.randn(mb_size, Z_dim)) c = sample_c(mb_size) # Dicriminator forward-loss-backward-update G_sample = G(z, c) D_real = D(X) D_fake = D(G_sample) D_loss = -torch.mean(torch.log(D_real + 1e-8) + torch.log(1 - D_fake + 1e-8)) D_loss.backward() D_solver.step() # Housekeeping - reset gradient reset_grad() # Generator forward-loss-backward-update G_sample = G(z, c) D_fake = D(G_sample) G_loss = -torch.mean(torch.log(D_fake + 1e-8)) G_loss.backward() G_solver.step() # Housekeeping - reset gradient reset_grad() # Q forward-loss-backward-update G_sample = G(z, c) Q_c_given_x = Q(G_sample) crossent_loss = torch.mean(-torch.sum(c * torch.log(Q_c_given_x + 1e-8), dim=1)) mi_loss = crossent_loss mi_loss.backward() Q_solver.step() # Housekeeping - reset gradient reset_grad() # Print and plot every now and then if it % 1000 == 0: idx = np.random.randint(0, 10) c = np.zeros([mb_size, 10]) c[range(mb_size), idx] = 1 c = Variable(torch.from_numpy(c.astype('float32'))) samples = G(z, c).data.numpy()[:16] print('Iter-{}; D_loss: {}; G_loss: {}; Idx: {}' .format(it, D_loss.data.numpy(), G_loss.data.numpy(), idx)) fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png' .format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/infogan/infogan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) X = tf.placeholder(tf.float32, shape=[None, 784]) D_W1 = tf.Variable(xavier_init([784, 128])) D_b1 = tf.Variable(tf.zeros(shape=[128])) D_W2 = tf.Variable(xavier_init([128, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) theta_D = [D_W1, D_W2, D_b1, D_b2] Z = tf.placeholder(tf.float32, shape=[None, 16]) c = tf.placeholder(tf.float32, shape=[None, 10]) G_W1 = tf.Variable(xavier_init([26, 256])) G_b1 = tf.Variable(tf.zeros(shape=[256])) G_W2 = tf.Variable(xavier_init([256, 784])) G_b2 = tf.Variable(tf.zeros(shape=[784])) theta_G = [G_W1, G_W2, G_b1, G_b2] Q_W1 = tf.Variable(xavier_init([784, 128])) Q_b1 = tf.Variable(tf.zeros(shape=[128])) Q_W2 = tf.Variable(xavier_init([128, 10])) Q_b2 = tf.Variable(tf.zeros(shape=[10])) theta_Q = [Q_W1, Q_W2, Q_b1, Q_b2] def sample_Z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def sample_c(m): return np.random.multinomial(1, 10*[0.1], size=m) def generator(z, c): inputs = tf.concat(axis=1, values=[z, c]) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob def discriminator(x): D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1) D_logit = tf.matmul(D_h1, D_W2) + D_b2 D_prob = tf.nn.sigmoid(D_logit) return D_prob def Q(x): Q_h1 = tf.nn.relu(tf.matmul(x, Q_W1) + Q_b1) Q_prob = tf.nn.softmax(tf.matmul(Q_h1, Q_W2) + Q_b2) return Q_prob def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig G_sample = generator(Z, c) D_real = discriminator(X) D_fake = discriminator(G_sample) Q_c_given_x = Q(G_sample) D_loss = -tf.reduce_mean(tf.log(D_real + 1e-8) + tf.log(1 - D_fake + 1e-8)) G_loss = -tf.reduce_mean(tf.log(D_fake + 1e-8)) cross_ent = tf.reduce_mean(-tf.reduce_sum(tf.log(Q_c_given_x + 1e-8) * c, 1)) Q_loss = cross_ent D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) Q_solver = tf.train.AdamOptimizer().minimize(Q_loss, var_list=theta_G + theta_Q) mb_size = 32 Z_dim = 16 mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): if it % 1000 == 0: Z_noise = sample_Z(16, Z_dim) idx = np.random.randint(0, 10) c_noise = np.zeros([16, 10]) c_noise[range(16), idx] = 1 samples = sess.run(G_sample, feed_dict={Z: Z_noise, c: c_noise}) fig = plot(samples) plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) X_mb, _ = mnist.train.next_batch(mb_size) Z_noise = sample_Z(mb_size, Z_dim) c_noise = sample_c(mb_size) _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, Z: Z_noise, c: c_noise}) _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: Z_noise, c: c_noise}) sess.run([Q_solver], feed_dict={Z: Z_noise, c: c_noise}) if it % 1000 == 0: print('Iter: {}'.format(it)) print('D loss: {:.4}'. format(D_loss_curr)) print('G_loss: {:.4}'.format(G_loss_curr)) print() ================================================ FILE: GAN/least_squares_gan/lsgan_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 d_step = 3 lr = 1e-3 G = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) D = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1), ) def reset_grad(): G.zero_grad() D.zero_grad() G_solver = optim.Adam(G.parameters(), lr=lr) D_solver = optim.Adam(D.parameters(), lr=lr) for it in range(1000000): for _ in range(d_step): # Sample data z = Variable(torch.randn(mb_size, z_dim)) X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) # Dicriminator G_sample = G(z) D_real = D(X) D_fake = D(G_sample) D_loss = 0.5 * (torch.mean((D_real - 1)**2) + torch.mean(D_fake**2)) D_loss.backward() D_solver.step() reset_grad() # Generator z = Variable(torch.randn(mb_size, z_dim)) G_sample = G(z) D_fake = D(G_sample) G_loss = 0.5 * torch.mean((D_fake - 1)**2) G_loss.backward() G_solver.step() reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss.data[0], G_loss.data[0])) samples = G(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/least_squares_gan/lsgan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os mb_size = 32 X_dim = 784 z_dim = 64 h_dim = 128 lr = 1e-3 d_steps = 3 mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) X = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) D_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) G_W1 = tf.Variable(xavier_init([z_dim, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_G = [G_W1, G_W2, G_b1, G_b2] theta_D = [D_W1, D_W2, D_b1, D_b2] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def generator(z): G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob def discriminator(x): D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1) out = tf.matmul(D_h1, D_W2) + D_b2 return out G_sample = generator(z) D_real = discriminator(X) D_fake = discriminator(G_sample) D_loss = 0.5 * (tf.reduce_mean((D_real - 1)**2) + tf.reduce_mean(D_fake**2)) G_loss = 0.5 * tf.reduce_mean((D_fake - 1)**2) D_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(D_loss, var_list=theta_D)) G_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(G_loss, var_list=theta_G)) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): for _ in range(d_steps): X_mb, _ = mnist.train.next_batch(mb_size) z_mb = sample_z(mb_size, z_dim) _, D_loss_curr = sess.run( [D_solver, D_loss], feed_dict={X: X_mb, z: z_mb} ) X_mb, _ = mnist.train.next_batch(mb_size) z_mb = sample_z(mb_size, z_dim) _, G_loss_curr = sess.run( [G_solver, G_loss], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)} ) if it % 1000 == 0: print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss_curr, G_loss_curr)) samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/magan/magan_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 d_step = 3 lr = 5e-4 m = 5 n_iter = 1000 n_epoch = 1000 N = n_iter * mb_size # N data per epoch G = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) # D is an autoencoder D_ = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), ) # Energy is the MSE of autoencoder def D(X): X_recon = D_(X) return torch.sum((X - X_recon)**2, 1) def reset_grad(): G.zero_grad() D_.zero_grad() G_solver = optim.Adamax(G.parameters(), lr=lr) D_solver = optim.Adamax(D_.parameters(), lr=lr) # Pretrain discriminator for it in range(2*n_iter): X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) loss = torch.mean(D(X)) # Minimize real samples energy loss.backward() D_solver.step() reset_grad() if it % 1000 == 0: print('Iter-{}; Pretrained D loss: {:.4}'.format(it, loss.data[0])) # Initial margin, expected energy of real data m = torch.mean(D(Variable(torch.from_numpy(mnist.train.images)))).data[0] s_z_before = torch.from_numpy(np.array([np.inf], dtype='float32')) # GAN training for t in range(n_epoch): s_x, s_z = torch.zeros(1), torch.zeros(1) for it in range(n_iter): # Sample data z = Variable(torch.randn(mb_size, z_dim)) X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) # Dicriminator G_sample = G(z) D_real = D(X) D_fake = D(G_sample) D_loss = torch.mean(D_real) + nn.relu(m - torch.mean(D_fake)) D_loss.backward() D_solver.step() # Update real samples statistics s_x += torch.sum(D_real.data) reset_grad() # Generator z = Variable(torch.randn(mb_size, z_dim)) G_sample = G(z) D_fake = D(G_sample) G_loss = torch.mean(D_fake) G_loss.backward() G_solver.step() # Update fake samples statistics s_z += torch.sum(D_fake.data) reset_grad() # Update margin if (((s_x[0] / N) < m) and (s_x[0] < s_z[0]) and (s_z_before[0] < s_z[0])): m = s_x[0] / N s_z_before = s_z # Convergence measure Ex = s_x[0] / N Ez = s_z[0] / N L = Ex + np.abs(Ex - Ez) # Visualize print('Epoch-{}; m = {:.4}; L = {:.4}' .format(t, m, L)) samples = G(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/magan/magan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os mb_size = 32 X_dim = 784 z_dim = 64 h_dim = 128 lr = 5e-4 n_iter = 1000 n_epoch = 1000 N = n_iter * mb_size # N data per epoch mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) X = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) m = tf.placeholder(tf.float32) D_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, X_dim])) D_b2 = tf.Variable(tf.zeros(shape=[X_dim])) G_W1 = tf.Variable(xavier_init([z_dim, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_G = [G_W1, G_W2, G_b1, G_b2] theta_D = [D_W1, D_W2, D_b1, D_b2] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def G(z): G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob def D(X): D_h1 = tf.nn.relu(tf.matmul(X, D_W1) + D_b1) X_recon = tf.matmul(D_h1, D_W2) + D_b2 return tf.reduce_sum((X - X_recon)**2, 1) G_sample = G(z) D_real = D(X) D_fake = D(G_sample) D_recon_loss = tf.reduce_mean(D_real) D_loss = tf.reduce_mean(D_real + tf.maximum(0., m - D_fake)) G_loss = tf.reduce_mean(D_fake) D_recon_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(D_recon_loss, var_list=theta_D)) D_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(D_loss, var_list=theta_D)) G_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(G_loss, var_list=theta_G)) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') # Pretrain for it in range(2*n_iter): X_mb, _ = mnist.train.next_batch(mb_size) _, D_recon_loss_curr = sess.run( [D_recon_solver, D_recon_loss], feed_dict={X: X_mb} ) if it % 1000 == 0: print('Iter-{}; Pretrained D loss: {:.4}'.format(it, D_recon_loss_curr)) i = 0 # Initial margin, expected energy of real data margin = sess.run(D_recon_loss, feed_dict={X: mnist.train.images}) s_z_before = np.inf # GAN training for t in range(n_epoch): s_x, s_z = 0., 0. for it in range(n_iter): X_mb, _ = mnist.train.next_batch(mb_size) z_mb = sample_z(mb_size, z_dim) _, D_loss_curr, D_real_curr = sess.run( [D_solver, D_loss, D_real], feed_dict={X: X_mb, z: z_mb, m: margin} ) # Update real samples statistics s_x += np.sum(D_real_curr) _, G_loss_curr, D_fake_curr = sess.run( [G_solver, G_loss, D_fake], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim), m: margin} ) # Update fake samples statistics s_z += np.sum(D_fake_curr) # Update margin if (s_x / N < margin) and (s_x < s_z) and (s_z_before < s_z): margin = s_x / N s_z_before = s_z # Convergence measure Ex = s_x / N Ez = s_z / N L = Ex + np.abs(Ex - Ez) # Visualize print('Epoch: {}; m: {:.4}, L: {:.4}'.format(t, margin, L)) samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/mode_regularized_gan/mode_reg_gan_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 128 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-4 lam1 = 1e-2 lam2 = 1e-2 def log(x): return torch.log(x + 1e-8) E = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, z_dim) ) G = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) D = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1), torch.nn.Sigmoid() ) def reset_grad(): G.zero_grad() D.zero_grad() E.zero_grad() def sample_X(size, include_y=False): X, y = mnist.train.next_batch(size) X = Variable(torch.from_numpy(X)) if include_y: y = np.argmax(y, axis=1).astype(np.int) y = Variable(torch.from_numpy(y)) return X, y return X E_solver = optim.Adam(E.parameters(), lr=lr) G_solver = optim.Adam(G.parameters(), lr=lr) D_solver = optim.Adam(D.parameters(), lr=lr) for it in range(1000000): """ Discriminator """ # Sample data X = sample_X(mb_size) z = Variable(torch.randn(mb_size, z_dim)) # Dicriminator_1 forward-loss-backward-update G_sample = G(z) D_real = D(X) D_fake = D(G_sample) D_loss = -torch.mean(log(D_real) + log(1 - D_fake)) D_loss.backward() D_solver.step() # Housekeeping - reset gradient reset_grad() """ Generator """ # Sample data X = sample_X(mb_size) z = Variable(torch.randn(mb_size, z_dim)) # Generator forward-loss-backward-update G_sample = G(z) G_sample_reg = G(E(X)) D_fake = D(G_sample) D_reg = D(G_sample_reg) mse = torch.sum((X - G_sample_reg)**2, 1) reg = torch.mean(lam1 * mse + lam2 * log(D_reg)) G_loss = -torch.mean(log(D_fake)) + reg G_loss.backward() G_solver.step() # Housekeeping - reset gradient reset_grad() """ Encoder """ # Sample data X = sample_X(mb_size) z = Variable(torch.randn(mb_size, z_dim)) G_sample_reg = G(E(X)) D_reg = D(G_sample_reg) mse = torch.sum((X - G_sample_reg)**2, 1) E_loss = torch.mean(lam1 * mse + lam2 * log(D_reg)) E_loss.backward() E_solver.step() # Housekeeping - reset gradient reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {}; E_loss: {}; G_loss: {}' .format(it, D_loss.data.numpy(), E_loss.data.numpy(), G_loss.data.numpy())) samples = G(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png' .format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/mode_regularized_gan/mode_reg_gan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os mb_size = 32 X_dim = 784 z_dim = 10 h_dim = 128 lam1 = 1e-2 lam2 = 1e-2 mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) def log(x): return tf.log(x + 1e-8) X = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) E_W1 = tf.Variable(xavier_init([X_dim, h_dim])) E_b1 = tf.Variable(tf.zeros(shape=[h_dim])) E_W2 = tf.Variable(xavier_init([h_dim, z_dim])) E_b2 = tf.Variable(tf.zeros(shape=[z_dim])) D_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) G_W1 = tf.Variable(xavier_init([z_dim, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_E = [E_W1, E_W2, E_b1, E_b2] theta_G = [G_W1, G_W2, G_b1, G_b2] theta_D = [D_W1, D_W2, D_b1, D_b2] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def encoder(x): E_h1 = tf.nn.relu(tf.matmul(x, E_W1) + E_b1) out = tf.matmul(E_h1, E_W2) + E_b2 return out def generator(z): G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob def discriminator(x): D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1) D_log_prob = tf.matmul(D_h1, D_W2) + D_b2 D_prob = tf.nn.sigmoid(D_log_prob) return D_prob G_sample = generator(z) G_sample_reg = generator(encoder(X)) D_real = discriminator(X) D_fake = discriminator(G_sample) D_reg = discriminator(G_sample_reg) mse = tf.reduce_sum((X - G_sample_reg)**2, 1) D_loss = -tf.reduce_mean(log(D_real) + log(1 - D_fake)) E_loss = tf.reduce_mean(lam1 * mse + lam2 * log(D_reg)) G_loss = -tf.reduce_mean(log(D_fake)) + E_loss E_solver = (tf.train.AdamOptimizer(learning_rate=1e-3) .minimize(E_loss, var_list=theta_E)) D_solver = (tf.train.AdamOptimizer(learning_rate=1e-3) .minimize(D_loss, var_list=theta_D)) G_solver = (tf.train.AdamOptimizer(learning_rate=1e-3) .minimize(G_loss, var_list=theta_G)) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X_mb, _ = mnist.train.next_batch(mb_size) _, D_loss_curr = sess.run( [D_solver, D_loss], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)} ) _, G_loss_curr = sess.run( [G_solver, G_loss], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)} ) _, E_loss_curr = sess.run( [E_solver, E_loss], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)} ) if it % 1000 == 0: print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}; E_loss: {:.4}' .format(it, D_loss_curr, G_loss_curr, E_loss_curr)) samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/softmax_gan/softmax_gan_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 16 z_dim = 10 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-3 def log(x): return torch.log(x + 1e-8) G = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) D = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1) ) def reset_grad(): G.zero_grad() D.zero_grad() G_solver = optim.Adam(G.parameters(), lr=lr) D_solver = optim.Adam(D.parameters(), lr=lr) D_target = 1./mb_size G_target = 1./(mb_size*2) for it in range(1000000): # Sample data z = Variable(torch.randn(mb_size, z_dim)) X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) G_sample = G(z) D_real = D(X) D_fake = D(G_sample) # Partition func. Z = torch.sum(torch.exp(-D_real)) + torch.sum(torch.exp(-D_fake)) # Dicriminator D_loss = torch.sum(D_target * D_real) + log(Z) D_loss.backward(retain_graph=True) D_solver.step() reset_grad() # Generator G_loss = torch.sum(G_target * D_real) + torch.sum(G_target * D_fake) + log(Z) G_loss.backward() G_solver.step() reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss.data[0], G_loss.data[0])) samples = G(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/softmax_gan/softmax_gan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os mb_size = 32 X_dim = 784 z_dim = 64 h_dim = 128 lr = 1e-3 d_steps = 3 mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) def log(x): return tf.log(x + 1e-8) X = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) D_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) G_W1 = tf.Variable(xavier_init([z_dim, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_G = [G_W1, G_W2, G_b1, G_b2] theta_D = [D_W1, D_W2, D_b1, D_b2] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def G(z): G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob def D(X): D_h1 = tf.nn.relu(tf.matmul(X, D_W1) + D_b1) out = tf.matmul(D_h1, D_W2) + D_b2 return out G_sample = G(z) D_real = D(X) D_fake = D(G_sample) D_target = 1./mb_size G_target = 1./(mb_size*2) Z = tf.reduce_sum(tf.exp(-D_real)) + tf.reduce_sum(tf.exp(-D_fake)) D_loss = tf.reduce_sum(D_target * D_real) + log(Z) G_loss = tf.reduce_sum(G_target * D_real) + tf.reduce_sum(G_target * D_fake) + log(Z) D_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(D_loss, var_list=theta_D)) G_solver = (tf.train.AdamOptimizer(learning_rate=lr) .minimize(G_loss, var_list=theta_G)) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X_mb, _ = mnist.train.next_batch(mb_size) z_mb = sample_z(mb_size, z_dim) _, D_loss_curr = sess.run( [D_solver, D_loss], feed_dict={X: X_mb, z: z_mb} ) _, G_loss_curr = sess.run( [G_solver, G_loss], feed_dict={X: X_mb, z: z_mb} ) if it % 1000 == 0: print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}' .format(it, D_loss_curr, G_loss_curr)) samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: GAN/vanilla_gan/gan_pytorch.py ================================================ import torch import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 64 Z_dim = 100 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 c = 0 lr = 1e-3 def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / np.sqrt(in_dim / 2.) return Variable(torch.randn(*size) * xavier_stddev, requires_grad=True) """ ==================== GENERATOR ======================== """ Wzh = xavier_init(size=[Z_dim, h_dim]) bzh = Variable(torch.zeros(h_dim), requires_grad=True) Whx = xavier_init(size=[h_dim, X_dim]) bhx = Variable(torch.zeros(X_dim), requires_grad=True) def G(z): h = nn.relu(z @ Wzh + bzh.repeat(z.size(0), 1)) X = nn.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1)) return X """ ==================== DISCRIMINATOR ======================== """ Wxh = xavier_init(size=[X_dim, h_dim]) bxh = Variable(torch.zeros(h_dim), requires_grad=True) Why = xavier_init(size=[h_dim, 1]) bhy = Variable(torch.zeros(1), requires_grad=True) def D(X): h = nn.relu(X @ Wxh + bxh.repeat(X.size(0), 1)) y = nn.sigmoid(h @ Why + bhy.repeat(h.size(0), 1)) return y G_params = [Wzh, bzh, Whx, bhx] D_params = [Wxh, bxh, Why, bhy] params = G_params + D_params """ ===================== TRAINING ======================== """ def reset_grad(): for p in params: if p.grad is not None: data = p.grad.data p.grad = Variable(data.new().resize_as_(data).zero_()) G_solver = optim.Adam(G_params, lr=1e-3) D_solver = optim.Adam(D_params, lr=1e-3) ones_label = Variable(torch.ones(mb_size, 1)) zeros_label = Variable(torch.zeros(mb_size, 1)) for it in range(100000): # Sample data z = Variable(torch.randn(mb_size, Z_dim)) X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) # Dicriminator forward-loss-backward-update G_sample = G(z) D_real = D(X) D_fake = D(G_sample) D_loss_real = nn.binary_cross_entropy(D_real, ones_label) D_loss_fake = nn.binary_cross_entropy(D_fake, zeros_label) D_loss = D_loss_real + D_loss_fake D_loss.backward() D_solver.step() # Housekeeping - reset gradient reset_grad() # Generator forward-loss-backward-update z = Variable(torch.randn(mb_size, Z_dim)) G_sample = G(z) D_fake = D(G_sample) G_loss = nn.binary_cross_entropy(D_fake, ones_label) G_loss.backward() G_solver.step() # Housekeeping - reset gradient reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {}; G_loss: {}'.format(it, D_loss.data.numpy(), G_loss.data.numpy())) samples = G(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(c).zfill(3)), bbox_inches='tight') c += 1 plt.close(fig) ================================================ FILE: GAN/vanilla_gan/gan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) X = tf.placeholder(tf.float32, shape=[None, 784]) D_W1 = tf.Variable(xavier_init([784, 128])) D_b1 = tf.Variable(tf.zeros(shape=[128])) D_W2 = tf.Variable(xavier_init([128, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) theta_D = [D_W1, D_W2, D_b1, D_b2] Z = tf.placeholder(tf.float32, shape=[None, 100]) G_W1 = tf.Variable(xavier_init([100, 128])) G_b1 = tf.Variable(tf.zeros(shape=[128])) G_W2 = tf.Variable(xavier_init([128, 784])) G_b2 = tf.Variable(tf.zeros(shape=[784])) theta_G = [G_W1, G_W2, G_b1, G_b2] def sample_Z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def generator(z): G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob def discriminator(x): D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1) D_logit = tf.matmul(D_h1, D_W2) + D_b2 D_prob = tf.nn.sigmoid(D_logit) return D_prob, D_logit def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig G_sample = generator(Z) D_real, D_logit_real = discriminator(X) D_fake, D_logit_fake = discriminator(G_sample) # D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake)) # G_loss = -tf.reduce_mean(tf.log(D_fake)) # Alternative losses: # ------------------- D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_real, labels=tf.ones_like(D_logit_real))) D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake))) D_loss = D_loss_real + D_loss_fake G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake))) D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) mb_size = 128 Z_dim = 100 mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): if it % 1000 == 0: samples = sess.run(G_sample, feed_dict={Z: sample_Z(16, Z_dim)}) fig = plot(samples) plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) X_mb, _ = mnist.train.next_batch(mb_size) _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)}) _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: sample_Z(mb_size, Z_dim)}) if it % 1000 == 0: print('Iter: {}'.format(it)) print('D loss: {:.4}'. format(D_loss_curr)) print('G_loss: {:.4}'.format(G_loss_curr)) print() ================================================ FILE: GAN/wasserstein_gan/wgan_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-4 G = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) D = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1), ) def reset_grad(): G.zero_grad() D.zero_grad() G_solver = optim.RMSprop(G.parameters(), lr=lr) D_solver = optim.RMSprop(D.parameters(), lr=lr) for it in range(1000000): for _ in range(5): # Sample data z = Variable(torch.randn(mb_size, z_dim)) X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) # Dicriminator forward-loss-backward-update G_sample = G(z) D_real = D(X) D_fake = D(G_sample) D_loss = -(torch.mean(D_real) - torch.mean(D_fake)) D_loss.backward() D_solver.step() # Weight clipping for p in D.parameters(): p.data.clamp_(-0.01, 0.01) # Housekeeping - reset gradient reset_grad() # Generator forward-loss-backward-update X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) z = Variable(torch.randn(mb_size, z_dim)) G_sample = G(z) D_fake = D(G_sample) G_loss = -torch.mean(D_fake) G_loss.backward() G_solver.step() # Housekeeping - reset gradient reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {}; G_loss: {}' .format(it, D_loss.data.numpy(), G_loss.data.numpy())) samples = G(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: GAN/wasserstein_gan/wgan_tensorflow.py ================================================ import tensorflow as tf from tensorflow.examples.tutorials.mnist import input_data import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os mb_size = 32 X_dim = 784 z_dim = 10 h_dim = 128 mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) X = tf.placeholder(tf.float32, shape=[None, X_dim]) D_W1 = tf.Variable(xavier_init([X_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) theta_D = [D_W1, D_W2, D_b1, D_b2] z = tf.placeholder(tf.float32, shape=[None, z_dim]) G_W1 = tf.Variable(xavier_init([z_dim, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, X_dim])) G_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_G = [G_W1, G_W2, G_b1, G_b2] def sample_z(m, n): return np.random.uniform(-1., 1., size=[m, n]) def generator(z): G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1) G_log_prob = tf.matmul(G_h1, G_W2) + G_b2 G_prob = tf.nn.sigmoid(G_log_prob) return G_prob def discriminator(x): D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1) out = tf.matmul(D_h1, D_W2) + D_b2 return out G_sample = generator(z) D_real = discriminator(X) D_fake = discriminator(G_sample) D_loss = tf.reduce_mean(D_real) - tf.reduce_mean(D_fake) G_loss = -tf.reduce_mean(D_fake) D_solver = (tf.train.RMSPropOptimizer(learning_rate=1e-4) .minimize(-D_loss, var_list=theta_D)) G_solver = (tf.train.RMSPropOptimizer(learning_rate=1e-4) .minimize(G_loss, var_list=theta_G)) clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in theta_D] sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): for _ in range(5): X_mb, _ = mnist.train.next_batch(mb_size) _, D_loss_curr, _ = sess.run( [D_solver, D_loss, clip_D], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)} ) _, G_loss_curr = sess.run( [G_solver, G_loss], feed_dict={z: sample_z(mb_size, z_dim)} ) if it % 100 == 0: print('Iter: {}; D loss: {:.4}; G_loss: {:.4}' .format(it, D_loss_curr, G_loss_curr)) if it % 1000 == 0: samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png' .format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: HelmholtzMachine/README.md ================================================ # Helmholtz Machines Implementation of (Binary) Helmholtz Machines. ## Disclaimer Currently the results is not that good. However, it might be useful to be used to gain intuition of Wake-Sleep Algorithm. ================================================ FILE: HelmholtzMachine/vanilla_HM/helmholtz.py ================================================ """ One layer Binary Helmholtz Machine ================================== """ import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from tensorflow.examples.tutorials.mnist import input_data if not os.path.exists('out/'): os.makedirs('out/') mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] mb_size = 16 h_dim = 36 # Recognition/inference weight R = np.random.randn(X_dim, h_dim) * 0.001 # Generative weight W = np.random.randn(h_dim, X_dim) * 0.001 # Generative bias of hidden variables B = np.random.randn(h_dim) * 0.001 def sigm(x): return 1/(1 + np.exp(-x)) def infer(X): # mb_size x x_dim -> mb_size x h_dim return sigm(X @ R) def generate(H): # mb_size x h_dim -> mb_size x x_dim return sigm(H @ W) # Wake-Sleep Algorithm # -------------------- alpha = 0.1 for t in range(1, 1001): # ---------- # Wake phase # ---------- # Upward pass X_mb = (mnist.train.next_batch(mb_size)[0] > 0.5).astype(np.float) H = np.random.binomial(n=1, p=infer(X_mb)) # Downward pass H_prime = sigm(B) V = generate(H) # Compute gradient dB = H - H_prime dW = np.array([np.outer(H[i], X_mb[i] - V[i]) for i in range(mb_size)]) # Update generative weight B += (alpha/t) * np.mean(dB, axis=0) W += (alpha/t) * np.mean(dW, axis=0) # ----------- # Sleep phase # ----------- # Downward pass H_mb = np.random.binomial(n=1, p=sigm(B)) V = np.random.binomial(n=1, p=generate(H_mb)) # Upward pass H = infer(V) # Compute gradient dR = np.array([np.outer(V, H_mb[i] - H[i]) for i in range(mb_size)]) # Update recognition weight R += (alpha/t) * np.mean(dR, axis=0) # Visualization # ------------- def plot(samples, size, name): size = int(size) fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(size, size), cmap='Greys_r') plt.savefig('out/{}.png'.format(name), bbox_inches='tight') plt.close(fig) X = (mnist.test.next_batch(mb_size)[0] > 0.5).astype(np.float) H = np.random.binomial(n=1, p=infer(X)) plot(H, np.sqrt(h_dim), 'H') X_recon = np.random.binomial(n=1, p=generate(H)) plot(X_recon, np.sqrt(X_dim), 'V') ================================================ FILE: LICENSE ================================================ This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to ================================================ FILE: RBM/README.md ================================================ # Restricted Boltzmann Machines Implementation of (Binary) Restricted Boltzmann Machines (RBM). Hidden and visible variables are Bernoulli ## Algorithm List 1. RBM with Contrastive Divergence (CD) 2. RBM with Persistent Contrastive Divergence (PCD) ================================================ FILE: RBM/rbm_binary_cd.py ================================================ import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from tensorflow.examples.tutorials.mnist import input_data if not os.path.exists('out/'): os.makedirs('out/') mnist = input_data.read_data_sets('../MNIST_data', one_hot=True) X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] mb_size = 16 h_dim = 36 W = np.random.randn(X_dim, h_dim) * 0.001 a = np.random.randn(h_dim) * 0.001 b = np.random.randn(X_dim) * 0.001 def sigm(x): return 1/(1 + np.exp(-x)) def infer(X): # mb_size x x_dim -> mb_size x h_dim return sigm(X @ W) def generate(H): # mb_size x h_dim -> mb_size x x_dim return sigm(H @ W.T) # Contrastive Divergence # ---------------------- # Approximate the log partition gradient Gibbs sampling alpha = 0.1 K = 10 # Num. of Gibbs sampling step for t in range(1, 1001): X_mb = (mnist.train.next_batch(mb_size)[0] > 0.5).astype(np.float) g = 0 g_a = 0 g_b = 0 for v in X_mb: # E[h|v,W] h = infer(v) # Gibbs sampling steps # -------------------- v_prime = np.copy(v) for k in range(K): # h ~ p(h|v,W) h_prime = np.random.binomial(n=1, p=infer(v_prime)) # v ~ p(v|h,W) v_prime = np.random.binomial(n=1, p=generate(h_prime)) # E[h|v',W] h_prime = infer(v_prime) # Compute data gradient grad_w = np.outer(v, h) - np.outer(v_prime, h_prime) grad_a = h - h_prime grad_b = v - v_prime # Accumulate minibatch gradient g += grad_w g_a += grad_a g_b += grad_b # Monte carlo gradient g *= 1 / mb_size g_a *= 1 / mb_size g_b *= 1 / mb_size # Update to maximize W += alpha * g a += alpha * g_a b += alpha * g_b # Visualization # ------------- def plot(samples, size, name): size = int(size) fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(size, size), cmap='Greys_r') plt.savefig('out/{}.png'.format(name), bbox_inches='tight') plt.close(fig) X = (mnist.test.next_batch(mb_size)[0] > 0.5).astype(np.float) H = np.random.binomial(n=1, p=infer(X)) plot(H, np.sqrt(h_dim), 'H') X_recon = (generate(H) > 0.5).astype(np.float) plot(X_recon, np.sqrt(X_dim), 'V') ================================================ FILE: RBM/rbm_binary_pcd.py ================================================ import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from tensorflow.examples.tutorials.mnist import input_data if not os.path.exists('out/'): os.makedirs('out/') mnist = input_data.read_data_sets('../MNIST_data', one_hot=True) X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] mb_size = 16 h_dim = 36 W = np.random.randn(X_dim, h_dim) * 0.001 def sigm(x): return 1/(1 + np.exp(-x)) def infer(X): # mb_size x x_dim -> mb_size x h_dim return sigm(X @ W) def generate(H): # mb_size x h_dim -> mb_size x x_dim return sigm(H @ W.T) # Persistent Contrastive Divergence # --------------------------------- # Approximate the log partition gradient using single step Gibbs sampling alpha = 0.1 K = 10 # Num. of MC iteration # Initialize the markov chain V_s = sigm(np.random.randn(mb_size, X_dim)) H_s = np.random.binomial(n=1, p=0.5, size=[mb_size, h_dim]) for t in range(1, 1001): X_mb = (mnist.train.next_batch(mb_size)[0] > 0.5).astype(np.float) g = 0 Mu = infer(X_mb) # Gibbs sampling step # ------------------- for i, v_s in enumerate(V_s): for k in range(K): # h ~ p(h|v,W) h_prime = np.random.binomial(n=1, p=infer(v_s)) # v ~ p(v|h,W) v_prime = np.random.binomial(n=1, p=generate(h_prime)) # Replace with new sample V_s[i] = v_prime H_s[i] = h_prime # Compute average gradient left = np.array([np.outer(X_mb[i], Mu[i]) for i in range(mb_size)]) right = np.array([np.outer(V_s[i], H_s[i]) for i in range(mb_size)]) g = np.mean(left, axis=0) - np.mean(right, axis=0) # Update W += alpha * g # Maximize likelihood # Visualization # ------------- def plot(samples, size, name): size = int(size) fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(size, size), cmap='Greys_r') plt.savefig('out/{}.png'.format(name), bbox_inches='tight') plt.close(fig) X = (mnist.test.next_batch(mb_size)[0] > 0.5).astype(np.float) H = np.random.binomial(n=1, p=infer(X)) plot(H, np.sqrt(h_dim), 'H') X_recon = (generate(H) > 0.5).astype(np.float) plot(X_recon, np.sqrt(X_dim), 'V') ================================================ FILE: README.md ================================================ # Generative Models Collection of generative models, e.g. GAN, VAE in Pytorch and Tensorflow. Also present here are RBM and Helmholtz Machine. ## Note: Generated samples will be stored in `GAN/{gan_model}/out` (or `VAE/{vae_model}/out`, etc) directory during training. ## What's in it? #### Generative Adversarial Nets (GAN) 1. [Vanilla GAN](https://arxiv.org/abs/1406.2661) 2. [Conditional GAN](https://arxiv.org/abs/1411.1784) 3. [InfoGAN](https://arxiv.org/abs/1606.03657) 4. [Wasserstein GAN](https://arxiv.org/abs/1701.07875) 5. [Mode Regularized GAN](https://arxiv.org/abs/1612.02136) 6. [Coupled GAN](https://arxiv.org/abs/1606.07536) 7. [Auxiliary Classifier GAN](https://arxiv.org/abs/1610.09585) 8. [Least Squares GAN](https://arxiv.org/abs/1611.04076v2) 9. [Boundary Seeking GAN](https://arxiv.org/abs/1702.08431) 10. [Energy Based GAN](https://arxiv.org/abs/1609.03126) 11. [f-GAN](https://arxiv.org/abs/1606.00709) 12. [Generative Adversarial Parallelization](https://arxiv.org/abs/1612.04021) 13. [DiscoGAN](https://arxiv.org/abs/1703.05192) 14. [Adversarial Feature Learning](https://arxiv.org/abs/1605.09782) & [Adversarially Learned Inference](https://arxiv.org/abs/1606.00704) 15. [Boundary Equilibrium GAN](https://arxiv.org/abs/1703.10717) 16. [Improved Training for Wasserstein GAN](https://arxiv.org/abs/1704.00028) 17. [DualGAN](https://arxiv.org/abs/1704.02510) 18. [MAGAN: Margin Adaptation for GAN](https://arxiv.org/abs/1704.03817) 19. [Softmax GAN](https://arxiv.org/abs/1704.06191) 20. [GibbsNet](https://papers.nips.cc/paper/7094-gibbsnet-iterative-adversarial-inference-for-deep-graphical-models.pdf) #### Variational Autoencoder (VAE) 1. [Vanilla VAE](https://arxiv.org/abs/1312.6114) 2. [Conditional VAE](https://arxiv.org/abs/1406.5298) 3. [Denoising VAE](https://arxiv.org/abs/1511.06406) 4. [Adversarial Autoencoder](https://arxiv.org/abs/1511.05644) 5. [Adversarial Variational Bayes](https://arxiv.org/abs/1701.04722) #### Restricted Boltzmann Machine (RBM) 1. [Binary RBM with Contrastive Divergence](http://www.cs.toronto.edu/~fritz/absps/cdmiguel.pdf) 2. [Binary RBM with Persistent Contrastive Divergence](http://www.cs.toronto.edu/~tijmen/pcd/pcd.pdf) #### Helmholtz Machine 1. [Binary Helmholtz Machine with Wake-Sleep Algorithm](http://www.cs.toronto.edu/~fritz/absps/ws.pdf) ## Dependencies 1. Install miniconda 2. Do `conda env create` 3. Enter the env `source activate generative-models` 4. Install [Tensorflow](https://www.tensorflow.org/get_started/os_setup) 5. Install [Pytorch](https://github.com/pytorch/pytorch#installation) ================================================ FILE: VAE/adversarial_autoencoder/aae_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 5 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-3 # Encoder Q = torch.nn.Sequential( torch.nn.Linear(X_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, z_dim) ) # Decoder P = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) # Discriminator D = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1), torch.nn.Sigmoid() ) def reset_grad(): Q.zero_grad() P.zero_grad() D.zero_grad() def sample_X(size, include_y=False): X, y = mnist.train.next_batch(size) X = Variable(torch.from_numpy(X)) if include_y: y = np.argmax(y, axis=1).astype(np.int) y = Variable(torch.from_numpy(y)) return X, y return X Q_solver = optim.Adam(Q.parameters(), lr=lr) P_solver = optim.Adam(P.parameters(), lr=lr) D_solver = optim.Adam(D.parameters(), lr=lr) for it in range(1000000): X = sample_X(mb_size) """ Reconstruction phase """ z_sample = Q(X) X_sample = P(z_sample) recon_loss = nn.binary_cross_entropy(X_sample, X) recon_loss.backward() P_solver.step() Q_solver.step() reset_grad() """ Regularization phase """ # Discriminator z_real = Variable(torch.randn(mb_size, z_dim)) z_fake = Q(X) D_real = D(z_real) D_fake = D(z_fake) D_loss = -torch.mean(torch.log(D_real) + torch.log(1 - D_fake)) D_loss.backward() D_solver.step() reset_grad() # Generator z_fake = Q(X) D_fake = D(z_fake) G_loss = -torch.mean(torch.log(D_fake)) G_loss.backward() Q_solver.step() reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}; recon_loss: {:.4}' .format(it, D_loss.data[0], G_loss.data[0], recon_loss.data[0])) samples = P(z_real).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png' .format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: VAE/adversarial_autoencoder/aae_tensorflow.py ================================================ import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 c = 0 lr = 1e-3 def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) """ Q(z|X) """ X = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) Q_W1 = tf.Variable(xavier_init([X_dim, h_dim])) Q_b1 = tf.Variable(tf.zeros(shape=[h_dim])) Q_W2 = tf.Variable(xavier_init([h_dim, z_dim])) Q_b2 = tf.Variable(tf.zeros(shape=[z_dim])) theta_Q = [Q_W1, Q_W2, Q_b1, Q_b2] def Q(X): h = tf.nn.relu(tf.matmul(X, Q_W1) + Q_b1) z = tf.matmul(h, Q_W2) + Q_b2 return z """ P(X|z) """ P_W1 = tf.Variable(xavier_init([z_dim, h_dim])) P_b1 = tf.Variable(tf.zeros(shape=[h_dim])) P_W2 = tf.Variable(xavier_init([h_dim, X_dim])) P_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_P = [P_W1, P_W2, P_b1, P_b2] def P(z): h = tf.nn.relu(tf.matmul(z, P_W1) + P_b1) logits = tf.matmul(h, P_W2) + P_b2 prob = tf.nn.sigmoid(logits) return prob, logits """ D(z) """ D_W1 = tf.Variable(xavier_init([z_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) theta_D = [D_W1, D_W2, D_b1, D_b2] def D(z): h = tf.nn.relu(tf.matmul(z, D_W1) + D_b1) logits = tf.matmul(h, D_W2) + D_b2 prob = tf.nn.sigmoid(logits) return prob """ Training """ z_sample = Q(X) _, logits = P(z_sample) # Sample from random z X_samples, _ = P(z) # E[log P(X|z)] recon_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X)) # Adversarial loss to approx. Q(z|X) D_real = D(z) D_fake = D(z_sample) D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake)) G_loss = -tf.reduce_mean(tf.log(D_fake)) AE_solver = tf.train.AdamOptimizer().minimize(recon_loss, var_list=theta_P + theta_Q) D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_Q) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X_mb, _ = mnist.train.next_batch(mb_size) z_mb = np.random.randn(mb_size, z_dim) _, recon_loss_curr = sess.run([AE_solver, recon_loss], feed_dict={X: X_mb}) _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, z: z_mb}) _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={X: X_mb}) if it % 1000 == 0: print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}; Recon_loss: {:.4}' .format(it, D_loss_curr, G_loss_curr, recon_loss_curr)) samples = sess.run(X_samples, feed_dict={z: np.random.randn(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: VAE/adversarial_vb/avb_pytorch.py ================================================ import torch import torch.nn import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 eps_dim = 4 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-3 def log(x): return torch.log(x + 1e-8) # Encoder: q(z|x,eps) Q = torch.nn.Sequential( torch.nn.Linear(X_dim + eps_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, z_dim) ) # Decoder: p(x|z) P = torch.nn.Sequential( torch.nn.Linear(z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, X_dim), torch.nn.Sigmoid() ) # Discriminator: T(X, z) T = torch.nn.Sequential( torch.nn.Linear(X_dim + z_dim, h_dim), torch.nn.ReLU(), torch.nn.Linear(h_dim, 1) ) def reset_grad(): Q.zero_grad() P.zero_grad() T.zero_grad() def sample_X(size, include_y=False): X, y = mnist.train.next_batch(size) X = Variable(torch.from_numpy(X)) if include_y: y = np.argmax(y, axis=1).astype(np.int) y = Variable(torch.from_numpy(y)) return X, y return X Q_solver = optim.Adam(Q.parameters(), lr=lr) P_solver = optim.Adam(P.parameters(), lr=lr) T_solver = optim.Adam(T.parameters(), lr=lr) for it in range(1000000): X = sample_X(mb_size) eps = Variable(torch.randn(mb_size, eps_dim)) z = Variable(torch.randn(mb_size, z_dim)) # Optimize VAE z_sample = Q(torch.cat([X, eps], 1)) X_sample = P(z_sample) T_sample = T(torch.cat([X, z_sample], 1)) disc = torch.mean(-T_sample) loglike = -nn.binary_cross_entropy(X_sample, X, size_average=False) / mb_size elbo = -(disc + loglike) elbo.backward() Q_solver.step() P_solver.step() reset_grad() # Discriminator T(X, z) z_sample = Q(torch.cat([X, eps], 1)) T_q = nn.sigmoid(T(torch.cat([X, z_sample], 1))) T_prior = nn.sigmoid(T(torch.cat([X, z], 1))) T_loss = -torch.mean(log(T_q) + log(1. - T_prior)) T_loss.backward() T_solver.step() reset_grad() # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; ELBO: {:.4}; T_loss: {:.4}' .format(it, -elbo.data[0], -T_loss.data[0])) samples = P(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png' .format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: VAE/adversarial_vb/avb_tensorflow.py ================================================ import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 32 z_dim = 10 eps_dim = 4 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 c = 0 lr = 1e-3 def log(x): return tf.log(x + 1e-8) def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) """ Q(z|X,eps) """ X = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) eps = tf.placeholder(tf.float32, shape=[None, eps_dim]) Q_W1 = tf.Variable(xavier_init([X_dim + eps_dim, h_dim])) Q_b1 = tf.Variable(tf.zeros(shape=[h_dim])) Q_W2 = tf.Variable(xavier_init([h_dim, z_dim])) Q_b2 = tf.Variable(tf.zeros(shape=[z_dim])) theta_Q = [Q_W1, Q_W2, Q_b1, Q_b2] def Q(X, eps): inputs = tf.concat(axis=1, values=[X, eps]) h = tf.nn.relu(tf.matmul(inputs, Q_W1) + Q_b1) z = tf.matmul(h, Q_W2) + Q_b2 return z """ P(X|z) """ P_W1 = tf.Variable(xavier_init([z_dim, h_dim])) P_b1 = tf.Variable(tf.zeros(shape=[h_dim])) P_W2 = tf.Variable(xavier_init([h_dim, X_dim])) P_b2 = tf.Variable(tf.zeros(shape=[X_dim])) theta_P = [P_W1, P_W2, P_b1, P_b2] def P(z): h = tf.nn.relu(tf.matmul(z, P_W1) + P_b1) logits = tf.matmul(h, P_W2) + P_b2 prob = tf.nn.sigmoid(logits) return prob, logits """ D(z) """ D_W1 = tf.Variable(xavier_init([X_dim + z_dim, h_dim])) D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, 1])) D_b2 = tf.Variable(tf.zeros(shape=[1])) theta_D = [D_W1, D_W2, D_b1, D_b2] def D(X, z): inputs = tf.concat([X, z], axis=1) h = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) return tf.matmul(h, D_W2) + D_b2 """ Training """ z_sample = Q(X, eps) _, X_logits = P(z_sample) D_sample = D(X, z_sample) D_q = tf.nn.sigmoid(D(X, z_sample)) D_prior = tf.nn.sigmoid(D(X, z)) # Sample from random z X_samples, _ = P(z) disc = tf.reduce_mean(-D_sample) nll = tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits(logits=X_logits, labels=X), axis=1 ) loglike = -tf.reduce_mean(nll) elbo = disc + loglike D_loss = tf.reduce_mean(log(D_q) + log(1. - D_prior)) VAE_solver = tf.train.AdamOptimizer().minimize(-elbo, var_list=theta_P+theta_Q) D_solver = tf.train.AdamOptimizer().minimize(-D_loss, var_list=theta_D) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X_mb, _ = mnist.train.next_batch(mb_size) eps_mb = np.random.randn(mb_size, eps_dim) z_mb = np.random.randn(mb_size, z_dim) _, elbo_curr = sess.run([VAE_solver, elbo], feed_dict={X: X_mb, eps: eps_mb, z: z_mb}) _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, eps: eps_mb, z: z_mb}) if it % 1000 == 0: print('Iter: {}; ELBO: {:.4}; D_Loss: {:.4}' .format(it, elbo_curr, D_loss_curr)) samples = sess.run(X_samples, feed_dict={z: np.random.randn(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: VAE/conditional_vae/cvae_pytorch.py ================================================ import torch import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 64 Z_dim = 100 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 cnt = 0 lr = 1e-3 def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / np.sqrt(in_dim / 2.) return Variable(torch.randn(*size) * xavier_stddev, requires_grad=True) # =============================== Q(z|X) ====================================== Wxh = xavier_init(size=[X_dim + y_dim, h_dim]) bxh = Variable(torch.zeros(h_dim), requires_grad=True) Whz_mu = xavier_init(size=[h_dim, Z_dim]) bhz_mu = Variable(torch.zeros(Z_dim), requires_grad=True) Whz_var = xavier_init(size=[h_dim, Z_dim]) bhz_var = Variable(torch.zeros(Z_dim), requires_grad=True) def Q(X, c): inputs = torch.cat([X, c], 1) h = nn.relu(inputs @ Wxh + bxh.repeat(inputs.size(0), 1)) z_mu = h @ Whz_mu + bhz_mu.repeat(h.size(0), 1) z_var = h @ Whz_var + bhz_var.repeat(h.size(0), 1) return z_mu, z_var def sample_z(mu, log_var): eps = Variable(torch.randn(mb_size, Z_dim)) return mu + torch.exp(log_var / 2) * eps # =============================== P(X|z) ====================================== Wzh = xavier_init(size=[Z_dim + y_dim, h_dim]) bzh = Variable(torch.zeros(h_dim), requires_grad=True) Whx = xavier_init(size=[h_dim, X_dim]) bhx = Variable(torch.zeros(X_dim), requires_grad=True) def P(z, c): inputs = torch.cat([z, c], 1) h = nn.relu(inputs @ Wzh + bzh.repeat(inputs.size(0), 1)) X = nn.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1)) return X # =============================== TRAINING ==================================== params = [Wxh, bxh, Whz_mu, bhz_mu, Whz_var, bhz_var, Wzh, bzh, Whx, bhx] solver = optim.Adam(params, lr=lr) for it in range(100000): X, c = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) c = Variable(torch.from_numpy(c.astype('float32'))) # Forward z_mu, z_var = Q(X, c) z = sample_z(z_mu, z_var) X_sample = P(z, c) # Loss recon_loss = nn.binary_cross_entropy(X_sample, X, size_average=False) / mb_size kl_loss = torch.mean(0.5 * torch.sum(torch.exp(z_var) + z_mu**2 - 1. - z_var, 1)) loss = recon_loss + kl_loss # Backward loss.backward() # Update solver.step() # Housekeeping for p in params: if p.grad is not None: data = p.grad.data p.grad = Variable(data.new().resize_as_(data).zero_()) # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; Loss: {:.4}'.format(it, loss.data[0])) c = np.zeros(shape=[mb_size, y_dim], dtype='float32') c[:, np.random.randint(0, 10)] = 1. c = Variable(torch.from_numpy(c)) z = Variable(torch.randn(mb_size, Z_dim)) samples = P(z, c).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight') cnt += 1 plt.close(fig) ================================================ FILE: VAE/conditional_vae/cvae_tensorflow.py ================================================ import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 64 z_dim = 100 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 c = 0 lr = 1e-3 def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) # =============================== Q(z|X) ====================================== X = tf.placeholder(tf.float32, shape=[None, X_dim]) c = tf.placeholder(tf.float32, shape=[None, y_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) Q_W1 = tf.Variable(xavier_init([X_dim + y_dim, h_dim])) Q_b1 = tf.Variable(tf.zeros(shape=[h_dim])) Q_W2_mu = tf.Variable(xavier_init([h_dim, z_dim])) Q_b2_mu = tf.Variable(tf.zeros(shape=[z_dim])) Q_W2_sigma = tf.Variable(xavier_init([h_dim, z_dim])) Q_b2_sigma = tf.Variable(tf.zeros(shape=[z_dim])) def Q(X, c): inputs = tf.concat(axis=1, values=[X, c]) h = tf.nn.relu(tf.matmul(inputs, Q_W1) + Q_b1) z_mu = tf.matmul(h, Q_W2_mu) + Q_b2_mu z_logvar = tf.matmul(h, Q_W2_sigma) + Q_b2_sigma return z_mu, z_logvar def sample_z(mu, log_var): eps = tf.random_normal(shape=tf.shape(mu)) return mu + tf.exp(log_var / 2) * eps # =============================== P(X|z) ====================================== P_W1 = tf.Variable(xavier_init([z_dim + y_dim, h_dim])) P_b1 = tf.Variable(tf.zeros(shape=[h_dim])) P_W2 = tf.Variable(xavier_init([h_dim, X_dim])) P_b2 = tf.Variable(tf.zeros(shape=[X_dim])) def P(z, c): inputs = tf.concat(axis=1, values=[z, c]) h = tf.nn.relu(tf.matmul(inputs, P_W1) + P_b1) logits = tf.matmul(h, P_W2) + P_b2 prob = tf.nn.sigmoid(logits) return prob, logits # =============================== TRAINING ==================================== z_mu, z_logvar = Q(X, c) z_sample = sample_z(z_mu, z_logvar) _, logits = P(z_sample, c) # Sampling from random z X_samples, _ = P(z, c) # E[log P(X|z)] recon_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X), 1) # D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist. are Gaussian kl_loss = 0.5 * tf.reduce_sum(tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, 1) # VAE loss vae_loss = tf.reduce_mean(recon_loss + kl_loss) solver = tf.train.AdamOptimizer().minimize(vae_loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X_mb, y_mb = mnist.train.next_batch(mb_size) _, loss = sess.run([solver, vae_loss], feed_dict={X: X_mb, c: y_mb}) if it % 1000 == 0: print('Iter: {}'.format(it)) print('Loss: {:.4}'. format(loss)) print() y = np.zeros(shape=[16, y_dim]) y[:, np.random.randint(0, y_dim)] = 1. samples = sess.run(X_samples, feed_dict={z: np.random.randn(16, z_dim), c: y}) fig = plot(samples) plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: VAE/denoising_vae/dvae_pytorch.py ================================================ import torch import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 64 Z_dim = 100 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 c = 0 lr = 1e-3 noise_factor = .25 def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / np.sqrt(in_dim / 2.) return Variable(torch.randn(*size) * xavier_stddev, requires_grad=True) """ Q(z|X) """ Wxh = xavier_init(size=[X_dim, h_dim]) bxh = Variable(torch.zeros(h_dim), requires_grad=True) Whz_mu = xavier_init(size=[h_dim, Z_dim]) bhz_mu = Variable(torch.zeros(Z_dim), requires_grad=True) Whz_var = xavier_init(size=[h_dim, Z_dim]) bhz_var = Variable(torch.zeros(Z_dim), requires_grad=True) def Q(X): h = nn.relu(X @ Wxh + bxh.repeat(X.size(0), 1)) z_mu = h @ Whz_mu + bhz_mu.repeat(h.size(0), 1) z_var = h @ Whz_var + bhz_var.repeat(h.size(0), 1) return z_mu, z_var def sample_z(mu, log_var): eps = Variable(torch.randn(mb_size, Z_dim)) return mu + torch.exp(log_var / 2) * eps """ P(X|z) """ Wzh = xavier_init(size=[Z_dim, h_dim]) bzh = Variable(torch.zeros(h_dim), requires_grad=True) Whx = xavier_init(size=[h_dim, X_dim]) bhx = Variable(torch.zeros(X_dim), requires_grad=True) def P(z): h = nn.relu(z @ Wzh + bzh.repeat(z.size(0), 1)) X = nn.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1)) return X """ Training """ params = [Wxh, bxh, Whz_mu, bhz_mu, Whz_var, bhz_var, Wzh, bzh, Whx, bhx] solver = optim.Adam(params, lr=lr) for it in range(100000): X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) # Add noise X_noise = X + noise_factor * Variable(torch.randn(X.size())) X_noise.data.clamp_(0., 1.) # Forward z_mu, z_var = Q(X_noise) z = sample_z(z_mu, z_var) X_sample = P(z) torch.nn.BCELoss recon_loss = nn.binary_cross_entropy(X_sample, X, size_average=False) / mb_size kl_loss = torch.mean(0.5 * torch.sum(torch.exp(z_var) + z_mu**2 - 1. - z_var, 1)) loss = recon_loss + kl_loss # Backward loss.backward() # Update solver.step() # Housekeeping for p in params: if p.grad is not None: data = p.grad.data p.grad = Variable(data.new().resize_as_(data).zero_()) # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; Loss: {:.4}'.format(it, loss.data[0])) z = Variable(torch.randn(mb_size, Z_dim)) samples = P(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(c).zfill(3)), bbox_inches='tight') c += 1 plt.close(fig) ================================================ FILE: VAE/denoising_vae/dvae_tensorflow.py ================================================ import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 64 z_dim = 100 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 c = 0 lr = 1e-3 noise_factor = 0.25 def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) """ Q(X|z) """ X = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) Q_W1 = tf.Variable(xavier_init([X_dim, h_dim])) Q_b1 = tf.Variable(tf.zeros(shape=[h_dim])) Q_W2_mu = tf.Variable(xavier_init([h_dim, z_dim])) Q_b2_mu = tf.Variable(tf.zeros(shape=[z_dim])) Q_W2_sigma = tf.Variable(xavier_init([h_dim, z_dim])) Q_b2_sigma = tf.Variable(tf.zeros(shape=[z_dim])) def Q(X): h = tf.nn.relu(tf.matmul(X, Q_W1) + Q_b1) z_mu = tf.matmul(h, Q_W2_mu) + Q_b2_mu z_logvar = tf.matmul(h, Q_W2_sigma) + Q_b2_sigma return z_mu, z_logvar def sample_z(mu, log_var): eps = tf.random_normal(shape=tf.shape(mu)) return mu + tf.exp(log_var / 2) * eps """ P(X|z) """ P_W1 = tf.Variable(xavier_init([z_dim, h_dim])) P_b1 = tf.Variable(tf.zeros(shape=[h_dim])) P_W2 = tf.Variable(xavier_init([h_dim, X_dim])) P_b2 = tf.Variable(tf.zeros(shape=[X_dim])) def P(z): h = tf.nn.relu(tf.matmul(z, P_W1) + P_b1) logits = tf.matmul(h, P_W2) + P_b2 prob = tf.nn.sigmoid(logits) return prob, logits """ Training """ # Add noise to X X_noise = X + noise_factor * tf.random_normal(tf.shape(X)) X_noise = tf.clip_by_value(X_noise, 0., 1.) z_mu, z_logvar = Q(X_noise) z_sample = sample_z(z_mu, z_logvar) _, logits = P(z_sample) # Sample from random z X_samples, _ = P(z) # E[log P(X|z)] recon_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X), 1) # D_KL(Q(z|X_noise) || P(z|X)); calculate in closed form as both dist. are Gaussian kl_loss = 0.5 * tf.reduce_sum(tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, 1) # VAE loss vae_loss = tf.reduce_mean(recon_loss + kl_loss) solver = tf.train.AdamOptimizer().minimize(vae_loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X_mb, _ = mnist.train.next_batch(mb_size) _, loss = sess.run([solver, vae_loss], feed_dict={X: X_mb}) if it % 1000 == 0: print('Iter: {}; Loss: {:.4}'.format(it, loss)) samples = sess.run(X_samples, feed_dict={z: np.random.randn(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: VAE/vanilla_vae/vae_pytorch.py ================================================ import torch import torch.nn.functional as nn import torch.autograd as autograd import torch.optim as optim import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from torch.autograd import Variable from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 64 Z_dim = 100 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 c = 0 lr = 1e-3 def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / np.sqrt(in_dim / 2.) return Variable(torch.randn(*size) * xavier_stddev, requires_grad=True) # =============================== Q(z|X) ====================================== Wxh = xavier_init(size=[X_dim, h_dim]) bxh = Variable(torch.zeros(h_dim), requires_grad=True) Whz_mu = xavier_init(size=[h_dim, Z_dim]) bhz_mu = Variable(torch.zeros(Z_dim), requires_grad=True) Whz_var = xavier_init(size=[h_dim, Z_dim]) bhz_var = Variable(torch.zeros(Z_dim), requires_grad=True) def Q(X): h = nn.relu(X @ Wxh + bxh.repeat(X.size(0), 1)) z_mu = h @ Whz_mu + bhz_mu.repeat(h.size(0), 1) z_var = h @ Whz_var + bhz_var.repeat(h.size(0), 1) return z_mu, z_var def sample_z(mu, log_var): eps = Variable(torch.randn(mb_size, Z_dim)) return mu + torch.exp(log_var / 2) * eps # =============================== P(X|z) ====================================== Wzh = xavier_init(size=[Z_dim, h_dim]) bzh = Variable(torch.zeros(h_dim), requires_grad=True) Whx = xavier_init(size=[h_dim, X_dim]) bhx = Variable(torch.zeros(X_dim), requires_grad=True) def P(z): h = nn.relu(z @ Wzh + bzh.repeat(z.size(0), 1)) X = nn.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1)) return X # =============================== TRAINING ==================================== params = [Wxh, bxh, Whz_mu, bhz_mu, Whz_var, bhz_var, Wzh, bzh, Whx, bhx] solver = optim.Adam(params, lr=lr) for it in range(100000): X, _ = mnist.train.next_batch(mb_size) X = Variable(torch.from_numpy(X)) # Forward z_mu, z_var = Q(X) z = sample_z(z_mu, z_var) X_sample = P(z) # Loss recon_loss = nn.binary_cross_entropy(X_sample, X, size_average=False) / mb_size kl_loss = torch.mean(0.5 * torch.sum(torch.exp(z_var) + z_mu**2 - 1. - z_var, 1)) loss = recon_loss + kl_loss # Backward loss.backward() # Update solver.step() # Housekeeping for p in params: if p.grad is not None: data = p.grad.data p.grad = Variable(data.new().resize_as_(data).zero_()) # Print and plot every now and then if it % 1000 == 0: print('Iter-{}; Loss: {:.4}'.format(it, loss.data[0])) samples = P(z).data.numpy()[:16] fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') if not os.path.exists('out/'): os.makedirs('out/') plt.savefig('out/{}.png'.format(str(c).zfill(3)), bbox_inches='tight') c += 1 plt.close(fig) ================================================ FILE: VAE/vanilla_vae/vae_tensorflow.py ================================================ import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec import os from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True) mb_size = 64 z_dim = 100 X_dim = mnist.train.images.shape[1] y_dim = mnist.train.labels.shape[1] h_dim = 128 c = 0 lr = 1e-3 def plot(samples): fig = plt.figure(figsize=(4, 4)) gs = gridspec.GridSpec(4, 4) gs.update(wspace=0.05, hspace=0.05) for i, sample in enumerate(samples): ax = plt.subplot(gs[i]) plt.axis('off') ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_aspect('equal') plt.imshow(sample.reshape(28, 28), cmap='Greys_r') return fig def xavier_init(size): in_dim = size[0] xavier_stddev = 1. / tf.sqrt(in_dim / 2.) return tf.random_normal(shape=size, stddev=xavier_stddev) # =============================== Q(z|X) ====================================== X = tf.placeholder(tf.float32, shape=[None, X_dim]) z = tf.placeholder(tf.float32, shape=[None, z_dim]) Q_W1 = tf.Variable(xavier_init([X_dim, h_dim])) Q_b1 = tf.Variable(tf.zeros(shape=[h_dim])) Q_W2_mu = tf.Variable(xavier_init([h_dim, z_dim])) Q_b2_mu = tf.Variable(tf.zeros(shape=[z_dim])) Q_W2_sigma = tf.Variable(xavier_init([h_dim, z_dim])) Q_b2_sigma = tf.Variable(tf.zeros(shape=[z_dim])) def Q(X): h = tf.nn.relu(tf.matmul(X, Q_W1) + Q_b1) z_mu = tf.matmul(h, Q_W2_mu) + Q_b2_mu z_logvar = tf.matmul(h, Q_W2_sigma) + Q_b2_sigma return z_mu, z_logvar def sample_z(mu, log_var): eps = tf.random_normal(shape=tf.shape(mu)) return mu + tf.exp(log_var / 2) * eps # =============================== P(X|z) ====================================== P_W1 = tf.Variable(xavier_init([z_dim, h_dim])) P_b1 = tf.Variable(tf.zeros(shape=[h_dim])) P_W2 = tf.Variable(xavier_init([h_dim, X_dim])) P_b2 = tf.Variable(tf.zeros(shape=[X_dim])) def P(z): h = tf.nn.relu(tf.matmul(z, P_W1) + P_b1) logits = tf.matmul(h, P_W2) + P_b2 prob = tf.nn.sigmoid(logits) return prob, logits # =============================== TRAINING ==================================== z_mu, z_logvar = Q(X) z_sample = sample_z(z_mu, z_logvar) _, logits = P(z_sample) # Sampling from random z X_samples, _ = P(z) # E[log P(X|z)] recon_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X), 1) # D_KL(Q(z|X) || P(z)); calculate in closed form as both dist. are Gaussian kl_loss = 0.5 * tf.reduce_sum(tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, 1) # VAE loss vae_loss = tf.reduce_mean(recon_loss + kl_loss) solver = tf.train.AdamOptimizer().minimize(vae_loss) sess = tf.Session() sess.run(tf.global_variables_initializer()) if not os.path.exists('out/'): os.makedirs('out/') i = 0 for it in range(1000000): X_mb, _ = mnist.train.next_batch(mb_size) _, loss = sess.run([solver, vae_loss], feed_dict={X: X_mb}) if it % 1000 == 0: print('Iter: {}'.format(it)) print('Loss: {:.4}'. format(loss)) print() samples = sess.run(X_samples, feed_dict={z: np.random.randn(16, z_dim)}) fig = plot(samples) plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight') i += 1 plt.close(fig) ================================================ FILE: environment.yml ================================================ name: generative-models dependencies: - python=3.5.1 - numpy=1.11.0 - scikit-learn=0.17.1 - scipy=0.17.1 - matplotlib=1.5.3 - pip: - keras==1.1.1