Full Code of wiseodd/generative-models for AI

master b930d5fa9e2f cached
56 files
175.3 KB
56.0k tokens
216 symbols
1 requests
Download .txt
Repository: wiseodd/generative-models
Branch: master
Commit: b930d5fa9e2f
Files: 56
Total size: 175.3 KB

Directory structure:
gitextract_9grku3me/

├── .gitignore
├── GAN/
│   ├── ali_bigan/
│   │   ├── ali_bigan_pytorch.py
│   │   └── ali_bigan_tensorflow.py
│   ├── auxiliary_classifier_gan/
│   │   ├── ac_gan_pytorch.py
│   │   └── ac_gan_tensorflow.py
│   ├── boundary_equilibrium_gan/
│   │   ├── began_pytorch.py
│   │   └── began_tensorflow.py
│   ├── boundary_seeking_gan/
│   │   ├── bgan_pytorch.py
│   │   └── bgan_tensorflow.py
│   ├── conditional_gan/
│   │   ├── cgan_pytorch.py
│   │   └── cgan_tensorflow.py
│   ├── coupled_gan/
│   │   ├── cogan_pytorch.py
│   │   └── cogan_tensorflow.py
│   ├── disco_gan/
│   │   ├── discogan_pytorch.py
│   │   └── discogan_tensorflow.py
│   ├── dual_gan/
│   │   ├── dualgan_pytorch.py
│   │   └── dualgan_tensorflow.py
│   ├── ebgan/
│   │   ├── ebgan_pytorch.py
│   │   └── ebgan_tensorflow.py
│   ├── f_gan/
│   │   ├── f_gan_pytorch.py
│   │   └── f_gan_tensorflow.py
│   ├── generative_adversarial_parallelization/
│   │   └── gap_pytorch.py
│   ├── gibbsnet/
│   │   └── gibbsnet_pytorch.py
│   ├── improved_wasserstein_gan/
│   │   └── wgan_gp_tensorflow.py
│   ├── infogan/
│   │   ├── infogan_pytorch.py
│   │   └── infogan_tensorflow.py
│   ├── least_squares_gan/
│   │   ├── lsgan_pytorch.py
│   │   └── lsgan_tensorflow.py
│   ├── magan/
│   │   ├── magan_pytorch.py
│   │   └── magan_tensorflow.py
│   ├── mode_regularized_gan/
│   │   ├── mode_reg_gan_pytorch.py
│   │   └── mode_reg_gan_tensorflow.py
│   ├── softmax_gan/
│   │   ├── softmax_gan_pytorch.py
│   │   └── softmax_gan_tensorflow.py
│   ├── vanilla_gan/
│   │   ├── gan_pytorch.py
│   │   └── gan_tensorflow.py
│   └── wasserstein_gan/
│       ├── wgan_pytorch.py
│       └── wgan_tensorflow.py
├── HelmholtzMachine/
│   ├── README.md
│   └── vanilla_HM/
│       └── helmholtz.py
├── LICENSE
├── RBM/
│   ├── README.md
│   ├── rbm_binary_cd.py
│   └── rbm_binary_pcd.py
├── README.md
├── VAE/
│   ├── adversarial_autoencoder/
│   │   ├── aae_pytorch.py
│   │   └── aae_tensorflow.py
│   ├── adversarial_vb/
│   │   ├── avb_pytorch.py
│   │   └── avb_tensorflow.py
│   ├── conditional_vae/
│   │   ├── cvae_pytorch.py
│   │   └── cvae_tensorflow.py
│   ├── denoising_vae/
│   │   ├── dvae_pytorch.py
│   │   └── dvae_tensorflow.py
│   └── vanilla_vae/
│       ├── vae_pytorch.py
│       └── vae_tensorflow.py
└── environment.yml

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# IPython Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# dotenv
.env

# virtualenv
venv/
ENV/

# Spyder project settings
.spyderproject

# Rope project settings
.ropeproject

*.sublime*
MNIST_data/
GAN/out/
VAE/out/
out/

# Unreleased
GAN/unrolled_gan/
GAN/loss_sensitive_gan/
GAN/generative_adversarial_parallelization/gap_tensorflow.py

.vscode


================================================
FILE: GAN/ali_bigan/ali_bigan_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data
from itertools import *


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-3


def log(x):
    return torch.log(x + 1e-8)


# Inference net (Encoder) Q(z|X)
Q = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, z_dim)
)

# Generator net (Decoder) P(X|z)
P = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)

D_ = torch.nn.Sequential(
    torch.nn.Linear(X_dim + z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1),
    torch.nn.Sigmoid()
)


def D(X, z):
    return D_(torch.cat([X, z], 1))


def reset_grad():
    Q.zero_grad()
    P.zero_grad()
    D_.zero_grad()


G_solver = optim.Adam(chain(Q.parameters(), P.parameters()), lr=lr)
D_solver = optim.Adam(D_.parameters(), lr=lr)


for it in range(1000000):
    # Sample data
    z = Variable(torch.randn(mb_size, z_dim))
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))

    # Discriminator
    z_hat = Q(X)
    X_hat = P(z)

    D_enc = D(X, z_hat)
    D_gen = D(X_hat, z)

    D_loss = -torch.mean(log(D_enc) + log(1 - D_gen))

    D_loss.backward()
    D_solver.step()
    G_solver.step()
    reset_grad()

    # Autoencoder Q, P
    z_hat = Q(X)
    X_hat = P(z)

    D_enc = D(X, z_hat)
    D_gen = D(X_hat, z)

    G_loss = -torch.mean(log(D_gen) + log(1 - D_enc))

    G_loss.backward()
    G_solver.step()
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss.data[0], G_loss.data[0]))

        samples = P(z).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/ali_bigan/ali_bigan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


mb_size = 32
X_dim = 784
z_dim = 64
h_dim = 128
lr = 1e-3
d_steps = 3

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


def log(x):
    return tf.log(x + 1e-8)


X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

D_W1 = tf.Variable(xavier_init([X_dim + z_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

Q_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
Q_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
Q_W2 = tf.Variable(xavier_init([h_dim, z_dim]))
Q_b2 = tf.Variable(tf.zeros(shape=[z_dim]))

P_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
P_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
P_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
P_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [Q_W1, Q_W2, Q_b1, Q_b2, P_W1, P_W2, P_b1, P_b2]
theta_D = [D_W1, D_W2, D_b1, D_b2]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def Q(X):
    h = tf.nn.relu(tf.matmul(X, Q_W1) + Q_b1)
    h = tf.matmul(h, Q_W2) + Q_b2
    return h


def P(z):
    h = tf.nn.relu(tf.matmul(z, P_W1) + P_b1)
    h = tf.matmul(h, P_W2) + P_b2
    return tf.nn.sigmoid(h)


def D(X, z):
    inputs = tf.concat([X, z], axis=1)
    h = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1)
    return tf.nn.sigmoid(tf.matmul(h, D_W2) + D_b2)


z_hat = Q(X)
X_hat = P(z)

D_enc = D(X, z_hat)
D_gen = D(X_hat, z)

D_loss = -tf.reduce_mean(log(D_enc) + log(1 - D_gen))
G_loss = -tf.reduce_mean(log(D_gen) + log(1 - D_enc))

D_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(D_loss, var_list=theta_D))
G_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(G_loss, var_list=theta_G))

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X_mb, _ = mnist.train.next_batch(mb_size)
    z_mb = sample_z(mb_size, z_dim)

    _, D_loss_curr = sess.run(
        [D_solver, D_loss], feed_dict={X: X_mb, z: z_mb}
    )

    _, G_loss_curr = sess.run(
        [G_solver, G_loss], feed_dict={X: X_mb, z: z_mb}
    )

    if it % 1000 == 0:
        print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr))

        samples = sess.run(X_hat, feed_dict={z: sample_z(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out/{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: GAN/auxiliary_classifier_gan/ac_gan_pytorch.py
================================================
import torch
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 16
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-3
eps = 1e-8


G_ = torch.nn.Sequential(
    torch.nn.Linear(z_dim + y_dim, h_dim),
    torch.nn.PReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)


def G(z, c):
    inputs = torch.cat([z, c], 1)
    return G_(inputs)


D_shared = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.PReLU()
)

D_gan = torch.nn.Sequential(
    torch.nn.Linear(h_dim, 1),
    torch.nn.Sigmoid()
)

D_aux = torch.nn.Sequential(
    torch.nn.Linear(h_dim, y_dim),
)


def D(X):
    h = D_shared(X)
    return D_gan(h), D_aux(h)


nets = [G_, D_shared, D_gan, D_aux]

G_params = G_.parameters()
D_params = (list(D_shared.parameters()) + list(D_gan.parameters()) +
            list(D_aux.parameters()))


def reset_grad():
    for net in nets:
        net.zero_grad()


G_solver = optim.Adam(G_params, lr=lr)
D_solver = optim.Adam(D_params, lr=lr)


for it in range(100000):
    # Sample data
    X, y = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))
    # c is one-hot
    c = Variable(torch.from_numpy(y.astype('float32')))
    # y_true is not one-hot (requirement from nn.cross_entropy)
    y_true = Variable(torch.from_numpy(y.argmax(axis=1).astype('int')))
    # z noise
    z = Variable(torch.randn(mb_size, z_dim))

    """ Discriminator """
    G_sample = G(z, c)
    D_real, C_real = D(X)
    D_fake, C_fake = D(G_sample)

    # GAN's D loss
    D_loss = torch.mean(torch.log(D_real + eps) + torch.log(1 - D_fake + eps))
    # Cross entropy aux loss
    C_loss = -nn.cross_entropy(C_real, y_true) - nn.cross_entropy(C_fake, y_true)

    # Maximize
    DC_loss = -(D_loss + C_loss)

    DC_loss.backward()
    D_solver.step()

    reset_grad()

    """ Generator """
    G_sample = G(z, c)
    D_fake, C_fake = D(G_sample)
    _, C_real = D(X)

    # GAN's G loss
    G_loss = torch.mean(torch.log(D_fake + eps))
    # Cross entropy aux loss
    C_loss = -nn.cross_entropy(C_real, y_true) - nn.cross_entropy(C_fake, y_true)

    # Maximize
    GC_loss = -(G_loss + C_loss)

    GC_loss.backward()
    G_solver.step()

    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        idx = np.random.randint(0, 10)
        c = np.zeros([16, y_dim])
        c[range(16), idx] = 1
        c = Variable(torch.from_numpy(c.astype('float32')))

        z = Variable(torch.randn(16, z_dim))

        samples = G(z, c).data.numpy()

        print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}; Idx: {}'
              .format(it, -D_loss.data[0], -G_loss.data[0], idx))

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'
                    .format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/auxiliary_classifier_gan/ac_gan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)

mb_size = 32
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
z_dim = 10
h_dim = 128
eps = 1e-8
lr = 1e-3
d_steps = 3


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


X = tf.placeholder(tf.float32, shape=[None, X_dim])
y = tf.placeholder(tf.float32, shape=[None, y_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

G_W1 = tf.Variable(xavier_init([z_dim + y_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))


def generator(z, c):
    inputs = tf.concat(axis=1, values=[z, c])
    G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)
    return G_prob


D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D_W2_gan = tf.Variable(xavier_init([h_dim, 1]))
D_b2_gan = tf.Variable(tf.zeros(shape=[1]))
D_W2_aux = tf.Variable(xavier_init([h_dim, y_dim]))
D_b2_aux = tf.Variable(tf.zeros(shape=[y_dim]))


def discriminator(X):
    D_h1 = tf.nn.relu(tf.matmul(X, D_W1) + D_b1)
    out_gan = tf.nn.sigmoid(tf.matmul(D_h1, D_W2_gan) + D_b2_gan)
    out_aux = tf.matmul(D_h1, D_W2_aux) + D_b2_aux
    return out_gan, out_aux


theta_G = [G_W1, G_W2, G_b1, G_b2]
theta_D = [D_W1, D_W2_gan, D_W2_aux, D_b1, D_b2_gan, D_b2_aux]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def cross_entropy(logit, y):
    return -tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=y))


G_sample = generator(z, y)

D_real, C_real = discriminator(X)
D_fake, C_fake = discriminator(G_sample)

# Cross entropy aux loss
C_loss = cross_entropy(C_real, y) + cross_entropy(C_fake, y)

# GAN D loss
D_loss = tf.reduce_mean(tf.log(D_real + eps) + tf.log(1. - D_fake + eps))
DC_loss = -(D_loss + C_loss)

# GAN's G loss
G_loss = tf.reduce_mean(tf.log(D_fake + eps))
GC_loss = -(G_loss + C_loss)

D_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(DC_loss, var_list=theta_D))
G_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(GC_loss, var_list=theta_G))


sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X_mb, y_mb = mnist.train.next_batch(mb_size)
    z_mb = sample_z(mb_size, z_dim)

    _, DC_loss_curr = sess.run(
        [D_solver, DC_loss],
        feed_dict={X: X_mb, y: y_mb, z: z_mb}
    )

    _, GC_loss_curr = sess.run(
        [G_solver, GC_loss],
        feed_dict={X: X_mb, y: y_mb, z: z_mb}
    )

    if it % 1000 == 0:
        idx = np.random.randint(0, 10)
        c = np.zeros([16, y_dim])
        c[range(16), idx] = 1

        samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim), y: c})

        print('Iter: {}; DC_loss: {:.4}; GC_loss: {:.4}; Idx; {}'
              .format(it, DC_loss_curr, GC_loss_curr, idx))

        fig = plot(samples)
        plt.savefig('out/{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: GAN/boundary_equilibrium_gan/began_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
d_step = 3
lr = 1e-3
m = 5
lam = 1e-3
k = 0
gamma = 0.5


G = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)

D_ = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
)


# D is an autoencoder, approximating Gaussian
def D(X):
    X_recon = D_(X)
    # Use Laplace MLE as in the paper
    return torch.mean(torch.sum(torch.abs(X - X_recon), 1))


def reset_grad():
    G.zero_grad()
    D_.zero_grad()


G_solver = optim.Adam(G.parameters(), lr=lr)
D_solver = optim.Adam(D_.parameters(), lr=lr)


for it in range(1000000):
    # Sample data
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))

    # Dicriminator
    z_D = Variable(torch.randn(mb_size, z_dim))

    D_loss = D(X) - k * D(G(z_D))

    D_loss.backward()
    D_solver.step()
    reset_grad()

    # Generator
    z_G = Variable(torch.randn(mb_size, z_dim))

    G_loss = D(G(z_G))

    G_loss.backward()
    G_solver.step()
    reset_grad()

    # Update k, the equlibrium
    k = k + lam * (gamma*D(X) - D(G(z_G)))
    k = k.data[0]  # k is variable, so unvariable it so that no gradient prop.

    # Print and plot every now and then
    if it % 1000 == 0:
        measure = D(X) + torch.abs(gamma*D(X) - D(G(z_G)))

        print('Iter-{}; Convergence measure: {:.4}'
              .format(it, measure.data[0]))

        samples = G(z_G).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/boundary_equilibrium_gan/began_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


mb_size = 32
X_dim = 784
z_dim = 64
h_dim = 128
lr = 1e-3
m = 5
lam = 1e-3
gamma = 0.5
k_curr = 0

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])
k = tf.placeholder(tf.float32)

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
D_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]
theta_D = [D_W1, D_W2, D_b1, D_b2]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def G(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)
    return G_prob


def D(X):
    D_h1 = tf.nn.relu(tf.matmul(X, D_W1) + D_b1)
    X_recon = tf.matmul(D_h1, D_W2) + D_b2
    return tf.reduce_mean(tf.reduce_sum((X - X_recon)**2, 1))


G_sample = G(z)

D_real = D(X)
D_fake = D(G_sample)

D_loss = D_real - k*D_fake
G_loss = D_fake

D_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(D_loss, var_list=theta_D))
G_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(G_loss, var_list=theta_G))

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X_mb, _ = mnist.train.next_batch(mb_size)

    _, D_real_curr = sess.run(
        [D_solver, D_real],
        feed_dict={X: X_mb, z: sample_z(mb_size, z_dim), k: k_curr}
    )

    _, D_fake_curr = sess.run(
        [G_solver, D_fake],
        feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)}
    )

    k_curr = k_curr + lam * (gamma*D_real_curr - D_fake_curr)

    if it % 1000 == 0:
        measure = D_real_curr + np.abs(gamma*D_real_curr - D_fake_curr)

        print('Iter-{}; Convergence measure: {:.4}'
              .format(it, measure))

        samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out/{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: GAN/boundary_seeking_gan/bgan_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-3


def log(x):
    return torch.log(x + 1e-8)


G = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)


D = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1),
    torch.nn.Sigmoid()
)


def reset_grad():
    G.zero_grad()
    D.zero_grad()


G_solver = optim.Adam(G.parameters(), lr=lr)
D_solver = optim.Adam(D.parameters(), lr=lr)


for it in range(1000000):
    # Sample data
    z = Variable(torch.randn(mb_size, z_dim))
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))

    # Dicriminator
    G_sample = G(z)
    D_real = D(X)
    D_fake = D(G_sample)

    D_loss = -torch.mean(log(D_real) + log(1 - D_fake))

    D_loss.backward()
    D_solver.step()
    reset_grad()

    # Generator
    G_sample = G(z)
    D_fake = D(G_sample)

    G_loss = 0.5 * torch.mean((log(D_fake) - log(1 - D_fake))**2)

    G_loss.backward()
    G_solver.step()
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss.data[0], G_loss.data[0]))

        samples = G(z).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/boundary_seeking_gan/bgan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


mb_size = 32
X_dim = 784
z_dim = 64
h_dim = 128
lr = 1e-3
d_steps = 3

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


def log(x):
    return tf.log(x + 1e-8)


X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]
theta_D = [D_W1, D_W2, D_b1, D_b2]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def generator(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)
    return G_prob


def discriminator(x):
    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)
    out = tf.nn.sigmoid(tf.matmul(D_h1, D_W2) + D_b2)
    return out


G_sample = generator(z)

D_real = discriminator(X)
D_fake = discriminator(G_sample)

D_loss = -tf.reduce_mean(log(D_real) + log(1 - D_fake))
G_loss = 0.5 * tf.reduce_mean((log(D_fake) - log(1 - D_fake))**2)

D_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(D_loss, var_list=theta_D))
G_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(G_loss, var_list=theta_G))

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X_mb, _ = mnist.train.next_batch(mb_size)
    z_mb = sample_z(mb_size, z_dim)

    _, D_loss_curr = sess.run(
        [D_solver, D_loss],
        feed_dict={X: X_mb, z: z_mb}
    )

    _, G_loss_curr = sess.run(
        [G_solver, G_loss],
        feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)}
    )

    if it % 1000 == 0:
        print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr))

        samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out/{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: GAN/conditional_gan/cgan_pytorch.py
================================================
import torch
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 64
Z_dim = 100
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-3


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / np.sqrt(in_dim / 2.)
    return Variable(torch.randn(*size) * xavier_stddev, requires_grad=True)


""" ==================== GENERATOR ======================== """

Wzh = xavier_init(size=[Z_dim + y_dim, h_dim])
bzh = Variable(torch.zeros(h_dim), requires_grad=True)

Whx = xavier_init(size=[h_dim, X_dim])
bhx = Variable(torch.zeros(X_dim), requires_grad=True)


def G(z, c):
    inputs = torch.cat([z, c], 1)
    h = nn.relu(inputs @ Wzh + bzh.repeat(inputs.size(0), 1))
    X = nn.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1))
    return X


""" ==================== DISCRIMINATOR ======================== """

Wxh = xavier_init(size=[X_dim + y_dim, h_dim])
bxh = Variable(torch.zeros(h_dim), requires_grad=True)

Why = xavier_init(size=[h_dim, 1])
bhy = Variable(torch.zeros(1), requires_grad=True)


def D(X, c):
    inputs = torch.cat([X, c], 1)
    h = nn.relu(inputs @ Wxh + bxh.repeat(inputs.size(0), 1))
    y = nn.sigmoid(h @ Why + bhy.repeat(h.size(0), 1))
    return y


G_params = [Wzh, bzh, Whx, bhx]
D_params = [Wxh, bxh, Why, bhy]
params = G_params + D_params


""" ===================== TRAINING ======================== """


def reset_grad():
    for p in params:
        if p.grad is not None:
            data = p.grad.data
            p.grad = Variable(data.new().resize_as_(data).zero_())


G_solver = optim.Adam(G_params, lr=1e-3)
D_solver = optim.Adam(D_params, lr=1e-3)

ones_label = Variable(torch.ones(mb_size, 1))
zeros_label = Variable(torch.zeros(mb_size, 1))


for it in range(100000):
    # Sample data
    z = Variable(torch.randn(mb_size, Z_dim))
    X, c = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))
    c = Variable(torch.from_numpy(c.astype('float32')))

    # Dicriminator forward-loss-backward-update
    G_sample = G(z, c)
    D_real = D(X, c)
    D_fake = D(G_sample, c)

    D_loss_real = nn.binary_cross_entropy(D_real, ones_label)
    D_loss_fake = nn.binary_cross_entropy(D_fake, zeros_label)
    D_loss = D_loss_real + D_loss_fake

    D_loss.backward()
    D_solver.step()

    # Housekeeping - reset gradient
    reset_grad()

    # Generator forward-loss-backward-update
    z = Variable(torch.randn(mb_size, Z_dim))
    G_sample = G(z, c)
    D_fake = D(G_sample, c)

    G_loss = nn.binary_cross_entropy(D_fake, ones_label)

    G_loss.backward()
    G_solver.step()

    # Housekeeping - reset gradient
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {}; G_loss: {}'.format(it, D_loss.data.numpy(), G_loss.data.numpy()))

        c = np.zeros(shape=[mb_size, y_dim], dtype='float32')
        c[:, np.random.randint(0, 10)] = 1.
        c = Variable(torch.from_numpy(c))
        samples = G(z, c).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/conditional_gan/cgan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 64
Z_dim = 100
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


""" Discriminator Net model """
X = tf.placeholder(tf.float32, shape=[None, 784])
y = tf.placeholder(tf.float32, shape=[None, y_dim])

D_W1 = tf.Variable(xavier_init([X_dim + y_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_b1, D_b2]


def discriminator(x, y):
    inputs = tf.concat(axis=1, values=[x, y])
    D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1)
    D_logit = tf.matmul(D_h1, D_W2) + D_b2
    D_prob = tf.nn.sigmoid(D_logit)

    return D_prob, D_logit


""" Generator Net model """
Z = tf.placeholder(tf.float32, shape=[None, Z_dim])

G_W1 = tf.Variable(xavier_init([Z_dim + y_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]


def generator(z, y):
    inputs = tf.concat(axis=1, values=[z, y])
    G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)

    return G_prob


def sample_Z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


G_sample = generator(Z, y)
D_real, D_logit_real = discriminator(X, y)
D_fake, D_logit_fake = discriminator(G_sample, y)

D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_real, labels=tf.ones_like(D_logit_real)))
D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake)))
D_loss = D_loss_real + D_loss_fake
G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)))

D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)


sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    if it % 1000 == 0:
        n_sample = 16

        Z_sample = sample_Z(n_sample, Z_dim)
        y_sample = np.zeros(shape=[n_sample, y_dim])
        y_sample[:, 7] = 1

        samples = sess.run(G_sample, feed_dict={Z: Z_sample, y:y_sample})

        fig = plot(samples)
        plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)

    X_mb, y_mb = mnist.train.next_batch(mb_size)

    Z_sample = sample_Z(mb_size, Z_dim)
    _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, Z: Z_sample, y:y_mb})
    _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: Z_sample, y:y_mb})

    if it % 1000 == 0:
        print('Iter: {}'.format(it))
        print('D loss: {:.4}'. format(D_loss_curr))
        print('G_loss: {:.4}'.format(G_loss_curr))
        print()


================================================
FILE: GAN/coupled_gan/cogan_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data
import copy
import scipy.ndimage.interpolation


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 100
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-3


""" Shared Generator weights """
G_shared = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
)

""" Generator 1 """
G1_ = torch.nn.Sequential(
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)

""" Generator 2 """
G2_ = torch.nn.Sequential(
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)


def G1(z):
    h = G_shared(z)
    X = G1_(h)
    return X


def G2(z):
    h = G_shared(z)
    X = G2_(h)
    return X


""" Shared Discriminator weights """
D_shared = torch.nn.Sequential(
    torch.nn.Linear(h_dim, 1),
    torch.nn.Sigmoid()
)

""" Discriminator 1 """
D1_ = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU()
)

""" Discriminator 2 """
D2_ = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU()
)


def D1(X):
    h = D1_(X)
    y = D_shared(h)
    return y


def D2(X):
    h = D2_(X)
    y = D_shared(h)
    return y


D_params = (list(D1_.parameters()) + list(D2_.parameters()) +
            list(D_shared.parameters()))
G_params = (list(G1_.parameters()) + list(G2_.parameters()) +
            list(G_shared.parameters()))
nets = [G_shared, G1_, G2_, D_shared, D1_, D2_]


def reset_grad():
    for net in nets:
        net.zero_grad()


G_solver = optim.Adam(G_params, lr=lr)
D_solver = optim.Adam(D_params, lr=lr)

X_train = mnist.train.images
half = int(X_train.shape[0] / 2)

# Real image
X_train1 = X_train[:half]
# Rotated image
X_train2 = X_train[half:].reshape(-1, 28, 28)
X_train2 = scipy.ndimage.interpolation.rotate(X_train2, 90, axes=(1, 2))
X_train2 = X_train2.reshape(-1, 28*28)

# Cleanup
del X_train


def sample_x(X, size):
    start_idx = np.random.randint(0, X.shape[0]-size)
    return Variable(torch.from_numpy(X[start_idx:start_idx+size]))


for it in range(100000):
    X1 = sample_x(X_train1, mb_size)
    X2 = sample_x(X_train2, mb_size)
    z = Variable(torch.randn(mb_size, z_dim))

    # Dicriminator
    G1_sample = G1(z)
    D1_real = D1(X1)
    D1_fake = D1(G1_sample)

    G2_sample = G2(z)
    D2_real = D2(X2)
    D2_fake = D2(G2_sample)

    D1_loss = torch.mean(-torch.log(D1_real + 1e-8) -
                         torch.log(1. - D1_fake + 1e-8))
    D2_loss = torch.mean(-torch.log(D2_real + 1e-8) -
                         torch.log(1. - D2_fake + 1e-8))
    D_loss = D1_loss + D2_loss

    D_loss.backward()

    # Average the gradients
    for p in D_shared.parameters():
        p.grad.data = 0.5 * p.grad.data

    D_solver.step()
    reset_grad()

    # Generator
    G1_sample = G1(z)
    D1_fake = D1(G1_sample)

    G2_sample = G2(z)
    D2_fake = D2(G2_sample)

    G1_loss = torch.mean(-torch.log(D1_fake + 1e-8))
    G2_loss = torch.mean(-torch.log(D2_fake + 1e-8))
    G_loss = G1_loss + G2_loss

    G_loss.backward()

    # Average the gradients
    for p in G_shared.parameters():
        p.grad.data = 0.5 * p.grad.data

    G_solver.step()
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D1_loss: {:.4}; G1_loss: {:.4}; '
              'D2_loss: {:.4}; G2_loss: {:.4}'
              .format(
                  it, D1_loss.data[0], G1_loss.data[0],
                  D2_loss.data[0], G2_loss.data[0])
              )

        z = Variable(torch.randn(8, z_dim))
        samples1 = G1(z).data.numpy()
        samples2 = G2(z).data.numpy()
        samples = np.vstack([samples1, samples2])

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'
                    .format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/coupled_gan/cogan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
import scipy.ndimage.interpolation


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)

mb_size = 32
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
z_dim = 10
h_dim = 128
eps = 1e-8
lr = 1e-3
d_steps = 3


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


X1 = tf.placeholder(tf.float32, shape=[None, X_dim])
X2 = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

G1_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G1_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

G2_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G2_b2 = tf.Variable(tf.zeros(shape=[X_dim]))


def G(z):
    h = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G1 = tf.nn.sigmoid(tf.matmul(h, G1_W2) + G1_b2)
    G2 = tf.nn.sigmoid(tf.matmul(h, G2_W2) + G2_b2)
    return G1, G2


D1_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D1_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

D2_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D2_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))


def D(X1, X2):
    h1 = tf.nn.relu(tf.matmul(X1, D1_W1) + D1_b1)
    h2 = tf.nn.relu(tf.matmul(X2, D2_W1) + D2_b1)
    D1_out = tf.nn.sigmoid(tf.matmul(h1, D_W2) + D_b2)
    D2_out = tf.nn.sigmoid(tf.matmul(h2, D_W2) + D_b2)
    return D1_out, D2_out


theta_G = [G1_W2, G2_W2, G1_b2, G2_b2]
theta_G_shared = [G_W1, G_b1]

theta_D = [D1_W1, D2_W1, D1_b1, D2_b1]
theta_D_shared = [D_W2, D_b2]

# Train D
G1_sample, G2_sample = G(z)
D1_real, D2_real = D(X1, X2)
D1_fake, D2_fake = D(G1_sample, G2_sample)

D1_loss = -tf.reduce_mean(tf.log(D1_real + eps) + tf.log(1. - D1_fake + eps))
D2_loss = -tf.reduce_mean(tf.log(D2_real + eps) + tf.log(1. - D2_fake + eps))
D_loss = D1_loss + D2_loss

# Train G
G1_loss = -tf.reduce_mean(tf.log(D1_fake + eps))
G2_loss = -tf.reduce_mean(tf.log(D2_fake + eps))
G_loss = G1_loss + G2_loss

# D optimizer
D_opt = tf.train.AdamOptimizer(learning_rate=lr)
# Compute the gradients for a list of variables.
D_gv = D_opt.compute_gradients(D_loss, theta_D)
D_shared_gv = D_opt.compute_gradients(D_loss, theta_D_shared)
# Average by halfing the shared gradients
D_shared_gv = [(0.5 * x[0], x[1]) for x in D_shared_gv]
# Update
D_solver = tf.group(
    D_opt.apply_gradients(D_gv), D_opt.apply_gradients(D_shared_gv)
)

# G optimizer
G_opt = tf.train.AdamOptimizer(learning_rate=lr)
# Compute the gradients for a list of variables.
G_gv = G_opt.compute_gradients(G_loss, theta_G)
G_shared_gv = G_opt.compute_gradients(G_loss, theta_G_shared)
# Average by halfing the shared gradients
G_shared_gv = [(0.5 * x[0], x[1]) for x in G_shared_gv]
# Update
G_solver = tf.group(
    G_opt.apply_gradients(G_gv), G_opt.apply_gradients(G_shared_gv)
)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

X_train = mnist.train.images
half = int(X_train.shape[0] / 2)

# Real image
X_train1 = X_train[:half]
# Rotated image
X_train2 = X_train[half:].reshape(-1, 28, 28)
X_train2 = scipy.ndimage.interpolation.rotate(X_train2, 90, axes=(1, 2))
X_train2 = X_train2.reshape(-1, 28*28)

# Cleanup
del X_train


def sample_X(X, size):
    start_idx = np.random.randint(0, X.shape[0]-size)
    return X[start_idx:start_idx+size]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X1_mb, X2_mb = sample_X(X_train1, mb_size), sample_X(X_train2, mb_size)
    z_mb = sample_z(mb_size, z_dim)

    _, D_loss_curr = sess.run(
        [D_solver, D_loss],
        feed_dict={X1: X1_mb, X2: X2_mb, z: z_mb}
    )

    _, G_loss_curr = sess.run(
        [G_solver, G_loss], feed_dict={z: z_mb}
    )

    if it % 1000 == 0:
        sample1, sample2 = sess.run(
            [G1_sample, G2_sample], feed_dict={z: sample_z(8, z_dim)}
        )

        samples = np.vstack([sample1, sample2])

        print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr))

        fig = plot(samples)
        plt.savefig('out/{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: GAN/disco_gan/discogan_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data
import scipy.ndimage.interpolation


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-3


def log(x):
    return torch.log(x + 1e-8)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


G_AB = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)

G_BA = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)

D_A = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1),
    torch.nn.Sigmoid()
)

D_B = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1),
    torch.nn.Sigmoid()
)

nets = [G_AB, G_BA, D_A, D_B]
G_params = list(G_AB.parameters()) + list(G_BA.parameters())
D_params = list(D_A.parameters()) + list(D_B.parameters())


def reset_grad():
    for net in nets:
        net.zero_grad()


G_solver = optim.Adam(G_params, lr=lr)
D_solver = optim.Adam(D_params, lr=lr)

if not os.path.exists('out/'):
    os.makedirs('out/')

# Gather training data: domain1 <- real MNIST img, domain2 <- rotated MNIST img
X_train = mnist.train.images
half = int(X_train.shape[0] / 2)
# Real image
X_train1 = X_train[:half]
# Rotated image
X_train2 = X_train[half:].reshape(-1, 28, 28)
X_train2 = scipy.ndimage.interpolation.rotate(X_train2, 90, axes=(1, 2))
X_train2 = X_train2.reshape(-1, 28*28)
# Cleanup
del X_train


def sample_x(X, size):
    start_idx = np.random.randint(0, X.shape[0]-size)
    return Variable(torch.from_numpy(X[start_idx:start_idx+size]))


# Training
for it in range(1000000):
    # Sample data from both domains
    X_A = sample_x(X_train1, mb_size)
    X_B = sample_x(X_train2, mb_size)

    # Discriminator A
    X_BA = G_BA(X_B)
    D_A_real = D_A(X_A)
    D_A_fake = D_A(X_BA)

    L_D_A = -torch.mean(log(D_A_real) + log(1 - D_A_fake))

    # Discriminator B
    X_AB = G_AB(X_A)
    D_B_real = D_B(X_B)
    D_B_fake = D_B(X_AB)

    L_D_B = -torch.mean(log(D_B_real) + log(1 - D_B_fake))

    # Total discriminator loss
    D_loss = L_D_A + L_D_B

    D_loss.backward()
    D_solver.step()
    reset_grad()

    # Generator AB
    X_AB = G_AB(X_A)
    D_B_fake = D_B(X_AB)
    X_ABA = G_BA(X_AB)

    L_adv_B = -torch.mean(log(D_B_fake))
    L_recon_A = torch.mean(torch.sum((X_A - X_ABA)**2, 1))
    L_G_AB = L_adv_B + L_recon_A

    # Generator BA
    X_BA = G_BA(X_B)
    D_A_fake = D_A(X_BA)
    X_BAB = G_AB(X_BA)

    L_adv_A = -torch.mean(log(D_A_fake))
    L_recon_B = torch.mean(torch.sum((X_B - X_BAB)**2, 1))
    L_G_BA = L_adv_A + L_recon_B

    # Total generator loss
    G_loss = L_G_AB + L_G_BA

    G_loss.backward()
    G_solver.step()
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss.data[0], G_loss.data[0]))

        input_A = sample_x(X_train1, size=4)
        input_B = sample_x(X_train2, size=4)

        samples_A = G_BA(input_B).data.numpy()
        samples_B = G_AB(input_A).data.numpy()

        input_A = input_A.data.numpy()
        input_B = input_B.data.numpy()

        # The resulting image sample would be in 4 rows:
        # row 1: real data from domain A, row 2 is its domain B translation
        # row 3: real data from domain B, row 4 is its domain A translation
        samples = np.vstack([input_A, samples_B, input_B, samples_A])

        fig = plot(samples)
        plt.savefig('out/{}.png'
                    .format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/disco_gan/discogan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
import scipy.ndimage.interpolation


mb_size = 32
X_dim = 784
z_dim = 64
h_dim = 128
lr = 1e-3
d_steps = 3

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


def log(x):
    return tf.log(x + 1e-8)


X_A = tf.placeholder(tf.float32, shape=[None, X_dim])
X_B = tf.placeholder(tf.float32, shape=[None, X_dim])

D_A_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_A_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D_A_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_A_b2 = tf.Variable(tf.zeros(shape=[1]))

D_B_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_B_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D_B_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_B_b2 = tf.Variable(tf.zeros(shape=[1]))

G_AB_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
G_AB_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
G_AB_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_AB_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

G_BA_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
G_BA_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
G_BA_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_BA_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_D = [D_A_W1, D_A_W2, D_A_b1, D_A_b2,
           D_B_W1, D_B_W2, D_B_b1, D_B_b2]
theta_G = [G_AB_W1, G_AB_W2, G_AB_b1, G_AB_b2,
           G_BA_W1, G_BA_W2, G_BA_b1, G_BA_b2]


def D_A(X):
    h = tf.nn.relu(tf.matmul(X, D_A_W1) + D_A_b1)
    return tf.nn.sigmoid(tf.matmul(h, D_A_W2) + D_A_b2)


def D_B(X):
    h = tf.nn.relu(tf.matmul(X, D_B_W1) + D_B_b1)
    return tf.nn.sigmoid(tf.matmul(h, D_B_W2) + D_B_b2)


def G_AB(X):
    h = tf.nn.relu(tf.matmul(X, G_AB_W1) + G_AB_b1)
    return tf.nn.sigmoid(tf.matmul(h, G_AB_W2) + G_AB_b2)


def G_BA(X):
    h = tf.nn.relu(tf.matmul(X, G_BA_W1) + G_BA_b1)
    return tf.nn.sigmoid(tf.matmul(h, G_BA_W2) + G_BA_b2)


# Discriminator A
X_BA = G_BA(X_B)
D_A_real = D_A(X_A)
D_A_fake = D_A(X_BA)

# Discriminator B
X_AB = G_AB(X_A)
D_B_real = D_B(X_B)
D_B_fake = D_B(X_AB)

# Generator AB
X_ABA = G_BA(X_AB)

# Generator BA
X_BAB = G_AB(X_BA)

# Discriminator loss
L_D_A = -tf.reduce_mean(log(D_A_real) + log(1 - D_A_fake))
L_D_B = -tf.reduce_mean(log(D_B_real) + log(1 - D_B_fake))

D_loss = L_D_A + L_D_B

# Generator loss
L_adv_B = -tf.reduce_mean(log(D_B_fake))
L_recon_A = tf.reduce_mean(tf.reduce_sum((X_A - X_ABA)**2, 1))
L_G_AB = L_adv_B + L_recon_A

L_adv_A = -tf.reduce_mean(log(D_A_fake))
L_recon_B = tf.reduce_mean(tf.reduce_sum((X_B - X_BAB)**2, 1))
L_G_BA = L_adv_A + L_recon_B

G_loss = L_G_AB + L_G_BA

# Solvers
solver = tf.train.AdamOptimizer(learning_rate=lr)
D_solver = solver.minimize(D_loss, var_list=theta_D)
G_solver = solver.minimize(G_loss, var_list=theta_G)

sess = tf.Session()
sess.run(tf.global_variables_initializer())


# Gather training data from 2 domains
X_train = mnist.train.images
half = int(X_train.shape[0] / 2)
# Real image
X_train1 = X_train[:half]
# Rotated image
X_train2 = X_train[half:].reshape(-1, 28, 28)
X_train2 = scipy.ndimage.interpolation.rotate(X_train2, 90, axes=(1, 2))
X_train2 = X_train2.reshape(-1, 28*28)
# Cleanup
del X_train


def sample_X(X, size):
    start_idx = np.random.randint(0, X.shape[0]-size)
    return X[start_idx:start_idx+size]


if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    # Sample data from both domains
    X_A_mb = sample_X(X_train1, mb_size)
    X_B_mb = sample_X(X_train2, mb_size)

    _, D_loss_curr = sess.run(
        [D_solver, D_loss], feed_dict={X_A: X_A_mb, X_B: X_B_mb}
    )

    _, G_loss_curr = sess.run(
        [G_solver, G_loss], feed_dict={X_A: X_A_mb, X_B: X_B_mb}
    )

    if it % 1000 == 0:
        print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr))

        input_A = sample_X(X_train1, size=4)
        input_B = sample_X(X_train2, size=4)

        samples_A = sess.run(X_BA, feed_dict={X_B: input_B})
        samples_B = sess.run(X_AB, feed_dict={X_A: input_A})

        # The resulting image sample would be in 4 rows:
        # row 1: real data from domain A, row 2 is its domain B translation
        # row 3: real data from domain B, row 4 is its domain A translation
        samples = np.vstack([input_A, samples_B, input_B, samples_A])

        fig = plot(samples)
        plt.savefig('out/{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: GAN/dual_gan/dualgan_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data
from itertools import chain
import scipy.ndimage.interpolation


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-4
n_critics = 3
lam1, lam2 = 100, 100


def log(x):
    return torch.log(x + 1e-8)


G1 = torch.nn.Sequential(
    torch.nn.Linear(X_dim + z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)

G2 = torch.nn.Sequential(
    torch.nn.Linear(X_dim + z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)

D1 = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1)
)

D2 = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1)
)


def reset_grad():
    G1.zero_grad()
    G2.zero_grad()
    D1.zero_grad()
    D2.zero_grad()


G_solver = optim.RMSprop(chain(G1.parameters(), G2.parameters()), lr=lr)
D1_solver = optim.RMSprop(D1.parameters(), lr=lr)
D2_solver = optim.RMSprop(D2.parameters(), lr=lr)

X_train = mnist.train.images
half = int(X_train.shape[0] / 2)

# Real image
X_train1 = X_train[:half]
# Rotated image
X_train2 = X_train[half:].reshape(-1, 28, 28)
X_train2 = scipy.ndimage.interpolation.rotate(X_train2, 90, axes=(1, 2))
X_train2 = X_train2.reshape(-1, 28*28)

# Cleanup
del X_train


def sample_x(X, size):
    start_idx = np.random.randint(0, X.shape[0]-size)
    return Variable(torch.from_numpy(X[start_idx:start_idx+size]))


for it in range(1000000):
    for _ in range(n_critics):
        # Sample data
        z1 = Variable(torch.randn(mb_size, z_dim))
        z2 = Variable(torch.randn(mb_size, z_dim))
        X1 = sample_x(X_train1, mb_size)
        X2 = sample_x(X_train2, mb_size)

        # D1
        X2_sample = G1(torch.cat([X1, z1], 1))  # G1: X1 -> X2
        D1_real = D1(X2)
        D1_fake = D1(X2_sample)

        D1_loss = -(torch.mean(D1_real) - torch.mean(D1_fake))

        D1_loss.backward(retain_graph=True)
        D1_solver.step()

        # Weight clipping
        for p in D1.parameters():
            p.data.clamp_(-0.01, 0.01)

        reset_grad()

        # D2
        X1_sample = G2(torch.cat([X2, z2], 1))  # G2: X2 -> X1
        D2_real = D2(X1)
        D2_fake = D2(X1_sample)

        D2_loss = -(torch.mean(D2_real) - torch.mean(D2_fake))

        D2_loss.backward()
        D2_solver.step()

        # Weight clipping
        for p in D2.parameters():
            p.data.clamp_(-0.01, 0.01)

        reset_grad()

    # Generator
    z1 = Variable(torch.randn(mb_size, z_dim))
    z2 = Variable(torch.randn(mb_size, z_dim))
    X1 = sample_x(X_train1, mb_size)
    X2 = sample_x(X_train2, mb_size)

    X1_sample = G2(torch.cat([X2, z2], 1))
    X2_sample = G1(torch.cat([X1, z1], 1))

    X1_recon = G2(torch.cat([X2_sample, z2], 1))
    X2_recon = G1(torch.cat([X1_sample, z1], 1))

    D1_fake = D1(X1_sample)
    D2_fake = D2(X2_sample)

    G_loss = -torch.mean(D1_fake) - torch.mean(D2_fake)
    reg1 = lam1 * torch.mean(torch.sum(torch.abs(X1_recon - X1), 1))
    reg2 = lam2 * torch.mean(torch.sum(torch.abs(X2_recon - X2), 1))

    G_loss += reg1 + reg2

    G_loss.backward()
    G_solver.step()
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D1_loss.data[0] + D2_loss.data[0], G_loss.data[0]))

        real1 = X1.data.numpy()[:4]
        real2 = X2.data.numpy()[:4]
        samples1 = X1_sample.data.numpy()[:4]
        samples2 = X2_sample.data.numpy()[:4]
        samples = np.vstack([real2, samples1, real1, samples2])

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/dual_gan/dualgan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
import scipy.ndimage.interpolation


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)

mb_size = 32
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
z_dim = 10
h_dim = 128
eps = 1e-8
lr = 1e-3
d_steps = 3
lam1, lam2 = 1000, 1000


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


X1 = tf.placeholder(tf.float32, shape=[None, X_dim])
X2 = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

G1_W1 = tf.Variable(xavier_init([X_dim + z_dim, h_dim]))
G1_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
G1_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G1_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

G2_W1 = tf.Variable(xavier_init([X_dim + z_dim, h_dim]))
G2_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
G2_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G2_b2 = tf.Variable(tf.zeros(shape=[X_dim]))


def G1(X1, z):
    inputs = tf.concat([X1, z], 1)
    h = tf.nn.relu(tf.matmul(inputs, G1_W1) + G1_b1)
    return tf.nn.sigmoid(tf.matmul(h, G1_W2) + G1_b2)


def G2(X2, z):
    inputs = tf.concat([X2, z], 1)
    h = tf.nn.relu(tf.matmul(inputs, G2_W1) + G2_b1)
    return tf.nn.sigmoid(tf.matmul(h, G2_W2) + G2_b2)


D1_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D1_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D1_W2 = tf.Variable(xavier_init([h_dim, 1]))
D1_b2 = tf.Variable(tf.zeros(shape=[1]))

D2_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D2_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D2_W2 = tf.Variable(xavier_init([h_dim, 1]))
D2_b2 = tf.Variable(tf.zeros(shape=[1]))


def D1(X):
    h = tf.nn.relu(tf.matmul(X, D1_W1) + D1_b1)
    return tf.matmul(h, D1_W2) + D1_b2


def D2(X):
    h = tf.nn.relu(tf.matmul(X, D1_W1) + D1_b1)
    return tf.matmul(h, D2_W2) + D2_b2


theta_G1 = [G1_W1, G1_W2, G1_b2, G1_b2]
theta_G2 = [G2_W1, G2_b1, G2_W2, G2_b2]
theta_G = theta_G1 + theta_G2

theta_D1 = [D1_W1, D1_W2, D1_b1, D1_b2]
theta_D2 = [D2_W1, D2_b1, D2_W2, D2_b2]

# D
X1_sample = G2(X2, z)
X2_sample = G1(X1, z)

D1_real = D1(X2)
D1_fake = D1(X2_sample)

D2_real = D2(X1)
D2_fake = D2(X1_sample)

D1_G = D1(X1_sample)
D2_G = D2(X2_sample)

X1_recon = G2(X2_sample, z)
X2_recon = G1(X1_sample, z)
recon1 = tf.reduce_mean(tf.reduce_sum(tf.abs(X1 - X1_recon), 1))
recon2 = tf.reduce_mean(tf.reduce_sum(tf.abs(X2 - X2_recon), 1))

D1_loss = tf.reduce_mean(D1_fake) - tf.reduce_mean(D1_real)
D2_loss = tf.reduce_mean(D2_fake) - tf.reduce_mean(D2_real)
G_loss = -tf.reduce_mean(D1_G + D2_G) + lam1*recon1 + lam2*recon2

D1_solver = (tf.train.RMSPropOptimizer(learning_rate=1e-4)
             .minimize(D1_loss, var_list=theta_D1))
D2_solver = (tf.train.RMSPropOptimizer(learning_rate=1e-4)
             .minimize(D2_loss, var_list=theta_D2))
G_solver = (tf.train.RMSPropOptimizer(learning_rate=1e-4)
            .minimize(G_loss, var_list=theta_G))

clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in theta_D1 + theta_D2]


sess = tf.Session()
sess.run(tf.global_variables_initializer())

X_train = mnist.train.images
half = int(X_train.shape[0] / 2)

# Real image
X_train1 = X_train[:half]
# Rotated image
X_train2 = X_train[half:].reshape(-1, 28, 28)
X_train2 = scipy.ndimage.interpolation.rotate(X_train2, 90, axes=(1, 2))
X_train2 = X_train2.reshape(-1, 28*28)

# Cleanup
del X_train


def sample_X(X, size):
    start_idx = np.random.randint(0, X.shape[0]-size)
    return X[start_idx:start_idx+size]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    for _ in range(d_steps):
        X1_mb, X2_mb = sample_X(X_train1, mb_size), sample_X(X_train2, mb_size)
        z_mb = sample_z(mb_size, z_dim)

        _, _, D1_loss_curr, D2_loss_curr, _ = sess.run(
            [D1_solver, D2_solver, D1_loss, D2_loss, clip_D],
            feed_dict={X1: X1_mb, X2: X2_mb, z: z_mb}
        )

    _, G_loss_curr = sess.run(
        [G_solver, G_loss], feed_dict={X1: X1_mb, X2: X2_mb, z: z_mb}
    )

    if it % 1000 == 0:
        sample1, sample2 = sess.run(
            [X1_sample, X2_sample],
            feed_dict={X1: X1_mb[:4], X2: X2_mb[:4], z: sample_z(4, z_dim)}
        )

        samples = np.vstack([X1_mb[:4], sample1, X2_mb[:4], sample2])

        print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D1_loss_curr + D2_loss_curr, G_loss_curr))

        fig = plot(samples)
        plt.savefig('out/{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: GAN/ebgan/ebgan_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
d_step = 3
lr = 1e-3
m = 5


G = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)

# D is an autoencoder
D_ = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
)


# Energy is the MSE of autoencoder
def D(X):
    X_recon = D_(X)
    return torch.mean(torch.sum((X - X_recon)**2, 1))


def reset_grad():
    G.zero_grad()
    D_.zero_grad()


G_solver = optim.Adam(G.parameters(), lr=lr)
D_solver = optim.Adam(D_.parameters(), lr=lr)


for it in range(1000000):
    # Sample data
    z = Variable(torch.randn(mb_size, z_dim))
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))

    # Dicriminator
    G_sample = G(z)
    D_real = D(X)
    D_fake = D(G_sample)

    # EBGAN D loss. D_real and D_fake is energy, i.e. a number
    D_loss = D_real + nn.relu(m - D_fake)

    # Reuse D_fake for generator loss
    D_loss.backward()
    D_solver.step()
    reset_grad()

    # Generator
    G_sample = G(z)
    D_fake = D(G_sample)

    G_loss = D_fake

    G_loss.backward()
    G_solver.step()
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss.data[0], G_loss.data[0]))

        samples = G(z).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/ebgan/ebgan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


mb_size = 32
X_dim = 784
z_dim = 64
h_dim = 128
lr = 1e-3
m = 5

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
D_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]
theta_D = [D_W1, D_W2, D_b1, D_b2]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def generator(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)
    return G_prob


def discriminator(X):
    D_h1 = tf.nn.relu(tf.matmul(X, D_W1) + D_b1)
    X_recon = tf.matmul(D_h1, D_W2) + D_b2
    mse = tf.reduce_mean(tf.reduce_sum((X - X_recon)**2, 1))
    return mse


G_sample = generator(z)

D_real = discriminator(X)
D_fake = discriminator(G_sample)

D_loss = D_real + tf.maximum(0., m - D_fake)
G_loss = D_fake

D_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(D_loss, var_list=theta_D))
G_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(G_loss, var_list=theta_G))

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X_mb, _ = mnist.train.next_batch(mb_size)
    z_mb = sample_z(mb_size, z_dim)

    _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, z: z_mb})

    _, G_loss_curr = sess.run(
        [G_solver, G_loss], feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)}
    )

    if it % 1000 == 0:
        print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr))

        samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out/{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: GAN/f_gan/f_gan_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-3


def log(x):
    return torch.log(x + 1e-8)


G = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)


D = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1),
)


def reset_grad():
    G.zero_grad()
    D.zero_grad()


G_solver = optim.Adam(G.parameters(), lr=lr)
D_solver = optim.Adam(D.parameters(), lr=lr)


for it in range(1000000):
    # Sample data
    z = Variable(torch.randn(mb_size, z_dim))
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))

    # Dicriminator
    G_sample = G(z)
    D_real = D(X)
    D_fake = D(G_sample)

    # Uncomment D_loss and its respective G_loss of your choice
    # ---------------------------------------------------------

    """ Total Variation """
    # D_loss = -(torch.mean(0.5 * torch.tanh(D_real)) -
    #            torch.mean(0.5 * torch.tanh(D_fake)))
    """ Forward KL """
    # D_loss = -(torch.mean(D_real) - torch.mean(torch.exp(D_fake - 1)))
    """ Reverse KL """
    D_loss = -(torch.mean(-torch.exp(D_real)) - torch.mean(-1 - D_fake))
    """ Pearson Chi-squared """
    # D_loss = -(torch.mean(D_real) - torch.mean(0.25*D_fake**2 + D_fake))
    """ Squared Hellinger """
    # D_loss = -(torch.mean(1 - torch.exp(D_real)) -
    #            torch.mean((1 - torch.exp(D_fake)) / (torch.exp(D_fake))))

    D_loss.backward()
    D_solver.step()
    reset_grad()

    # Generator
    G_sample = G(z)
    D_fake = D(G_sample)

    """ Total Variation """
    # G_loss = -torch.mean(0.5 * torch.tanh(D_fake))
    """ Forward KL """
    # G_loss = -torch.mean(torch.exp(D_fake - 1))
    """ Reverse KL """
    G_loss = -torch.mean(-1 - D_fake)
    """ Pearson Chi-squared """
    # G_loss = -torch.mean(0.25*D_fake**2 + D_fake)
    """ Squared Hellinger """
    # G_loss = -torch.mean((1 - torch.exp(D_fake)) / (torch.exp(D_fake)))

    G_loss.backward()
    G_solver.step()
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss.data[0], G_loss.data[0]))

        samples = G(z).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/f_gan/f_gan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


mb_size = 32
X_dim = 784
z_dim = 64
h_dim = 128
lr = 1e-3
d_steps = 3

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]
theta_D = [D_W1, D_W2, D_b1, D_b2]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def generator(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)
    return G_prob


def discriminator(x):
    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)
    out = tf.matmul(D_h1, D_W2) + D_b2
    return out


G_sample = generator(z)

D_real = discriminator(X)
D_fake = discriminator(G_sample)

# Uncomment D_loss and its respective G_loss of your choice
# ---------------------------------------------------------

""" Total Variation """
# D_loss = -(tf.reduce_mean(0.5 * tf.nn.tanh(D_real)) -
#            tf.reduce_mean(0.5 * tf.nn.tanh(D_fake)))
# G_loss = -tf.reduce_mean(0.5 * tf.nn.tanh(D_fake))

""" Forward KL """
# D_loss = -(tf.reduce_mean(D_real) - tf.reduce_mean(tf.exp(D_fake - 1)))
# G_loss = -tf.reduce_mean(tf.exp(D_fake - 1))

""" Reverse KL """
# D_loss = -(tf.reduce_mean(-tf.exp(D_real)) - tf.reduce_mean(-1 - D_fake))
# G_loss = -tf.reduce_mean(-1 - D_fake)

""" Pearson Chi-squared """
D_loss = -(tf.reduce_mean(D_real) - tf.reduce_mean(0.25*D_fake**2 + D_fake))
G_loss = -tf.reduce_mean(0.25*D_fake**2 + D_fake)

""" Squared Hellinger """
# D_loss = -(tf.reduce_mean(1 - tf.exp(D_real)) -
#            tf.reduce_mean((1 - tf.exp(D_fake)) / (tf.exp(D_fake))))
# G_loss = -tf.reduce_mean((1 - tf.exp(D_fake)) / (tf.exp(D_fake)))


D_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(D_loss, var_list=theta_D))
G_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(G_loss, var_list=theta_G))

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X_mb, _ = mnist.train.next_batch(mb_size)
    z_mb = sample_z(mb_size, z_dim)

    _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, z: z_mb})
    _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={z: z_mb})

    if it % 1000 == 0:
        print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr))

        samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out/{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: GAN/generative_adversarial_parallelization/gap_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
import random
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-3
K = 100


def log(x):
    return torch.log(x + 1e-8)


G1_ = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)


D1_ = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1),
    torch.nn.Sigmoid()
)

G2_ = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)


D2_ = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1),
    torch.nn.Sigmoid()
)

nets = [G1_, D1_, G2_, D2_]


def reset_grad():
    for net in nets:
        net.zero_grad()


G1_solver = optim.Adam(G1_.parameters(), lr=lr)
D1_solver = optim.Adam(D1_.parameters(), lr=lr)
G2_solver = optim.Adam(G2_.parameters(), lr=lr)
D2_solver = optim.Adam(D2_.parameters(), lr=lr)

D1 = {'model': D1_, 'solver': D1_solver}
G1 = {'model': G1_, 'solver': G1_solver}
D2 = {'model': D2_, 'solver': D2_solver}
G2 = {'model': G2_, 'solver': G2_solver}

GAN_pairs = [(D1, G1), (D2, G2)]

for it in range(1000000):
    # Sample data
    z = Variable(torch.randn(mb_size, z_dim))
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))

    for D, G in GAN_pairs:
        # Discriminator
        G_sample = G['model'](z)
        D_real = D['model'](X)
        D_fake = D['model'](G_sample)

        D_loss = -torch.mean(log(D_real) + log(1 - D_fake))

        D_loss.backward()
        D['solver'].step()
        reset_grad()

        # Generator
        G_sample = G['model'](z)
        D_fake = D['model'](G_sample)

        G_loss = -torch.mean(log(D_fake))

        G_loss.backward()
        G['solver'].step()
        reset_grad()

    if it != 0 and it % K == 0:
        # Swap (D, G) pairs
        new_D1, new_D2 = GAN_pairs[1][0], GAN_pairs[0][0]
        GAN_pairs = [(new_D1, G1), (new_D2, G2)]

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss.data[0], G_loss.data[0]))

        # Pick G randomly
        G_rand = random.choice([G1_, G2_])
        samples = G_rand(z).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/gibbsnet/gibbsnet_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data
from itertools import *


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 16
z_dim = 100
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 256
cnt = 0
lr = 1e-4
N = 10


def log(x):
    return torch.log(x + 1e-8)


# Inference net (Encoder) Q(z|X)
Q = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, z_dim)
)

# Generator net (Decoder) P(X|z)
P = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)

D_ = torch.nn.Sequential(
    torch.nn.Linear(X_dim + z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1),
    torch.nn.Sigmoid()
)


def D(X, z):
    return D_(torch.cat([X, z], 1))


def reset_grad():
    Q.zero_grad()
    P.zero_grad()
    D_.zero_grad()


G_solver = optim.Adam(chain(Q.parameters(), P.parameters()), lr=lr)
D_solver = optim.Adam(D_.parameters(), lr=lr)


for it in range(1000000):
    # Sample data
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))

    # Discriminator
    z_hat = Q(X)

    # Do N step Gibbs sampling
    z = Variable(torch.randn(mb_size, z_dim))

    for _ in range(N):
        z_n = z.clone()
        X_hat = P(z_n)
        z = Q(X_hat)

    p_data = D(X, z_hat)
    p_model = D(X_hat, z_n)

    D_loss = -torch.mean(log(p_data) + log(1 - p_model))

    D_loss.backward(retain_graph=True)
    D_solver.step()
    G_solver.step()
    reset_grad()

    G_loss = -torch.mean(log(p_model) + log(1 - p_data))

    G_loss.backward()
    G_solver.step()
    reset_grad()

    # Print and plot every now and then
    if it % 100 == 0:
        print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss.data[0], G_loss.data[0]))

        z = Variable(torch.randn(mb_size, z_dim))

        for _ in range(N):
            z_n = z.clone()
            X_hat = P(z_n)
            z = Q(X_hat)

        samples = X_hat.data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/improved_wasserstein_gan/wgan_gp_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


mb_size = 32
X_dim = 784
z_dim = 10
h_dim = 128
lam = 10
n_disc = 5
lr = 1e-4

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


X = tf.placeholder(tf.float32, shape=[None, X_dim])

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_b1, D_b2]


z = tf.placeholder(tf.float32, shape=[None, z_dim])

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def G(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)
    return G_prob


def D(X):
    D_h1 = tf.nn.relu(tf.matmul(X, D_W1) + D_b1)
    out = tf.matmul(D_h1, D_W2) + D_b2
    return out


G_sample = G(z)
D_real = D(X)
D_fake = D(G_sample)

eps = tf.random_uniform([mb_size, 1], minval=0., maxval=1.)
X_inter = eps*X + (1. - eps)*G_sample
grad = tf.gradients(D(X_inter), [X_inter])[0]
grad_norm = tf.sqrt(tf.reduce_sum((grad)**2, axis=1))
grad_pen = lam * tf.reduce_mean((grad_norm - 1)**2)

D_loss = tf.reduce_mean(D_fake) - tf.reduce_mean(D_real) + grad_pen
G_loss = -tf.reduce_mean(D_fake)

D_solver = (tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5)
            .minimize(D_loss, var_list=theta_D))
G_solver = (tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5)
            .minimize(G_loss, var_list=theta_G))

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    for _ in range(n_disc):
        X_mb, _ = mnist.train.next_batch(mb_size)

        _, D_loss_curr = sess.run(
            [D_solver, D_loss],
            feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)}
        )

    _, G_loss_curr = sess.run(
        [G_solver, G_loss],
        feed_dict={z: sample_z(mb_size, z_dim)}
    )

    if it % 1000 == 0:
        print('Iter: {}; D loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr))

        if it % 1000 == 0:
            samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})

            fig = plot(samples)
            plt.savefig('out/{}.png'
                        .format(str(i).zfill(3)), bbox_inches='tight')
            i += 1
            plt.close(fig)


================================================
FILE: GAN/infogan/infogan_pytorch.py
================================================
import torch
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
Z_dim = 16
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-3


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / np.sqrt(in_dim / 2.)
    return Variable(torch.randn(*size) * xavier_stddev, requires_grad=True)


""" ==================== GENERATOR ======================== """

Wzh = xavier_init(size=[Z_dim + 10, h_dim])
bzh = Variable(torch.zeros(h_dim), requires_grad=True)

Whx = xavier_init(size=[h_dim, X_dim])
bhx = Variable(torch.zeros(X_dim), requires_grad=True)


def G(z, c):
    inputs = torch.cat([z, c], 1)
    h = nn.relu(inputs @ Wzh + bzh.repeat(inputs.size(0), 1))
    X = nn.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1))
    return X


""" ==================== DISCRIMINATOR ======================== """

Wxh = xavier_init(size=[X_dim, h_dim])
bxh = Variable(torch.zeros(h_dim), requires_grad=True)

Why = xavier_init(size=[h_dim, 1])
bhy = Variable(torch.zeros(1), requires_grad=True)


def D(X):
    h = nn.relu(X @ Wxh + bxh.repeat(X.size(0), 1))
    y = nn.sigmoid(h @ Why + bhy.repeat(h.size(0), 1))
    return y


""" ====================== Q(c|X) ========================== """

Wqxh = xavier_init(size=[X_dim, h_dim])
bqxh = Variable(torch.zeros(h_dim), requires_grad=True)

Whc = xavier_init(size=[h_dim, 10])
bhc = Variable(torch.zeros(10), requires_grad=True)


def Q(X):
    h = nn.relu(X @ Wqxh + bqxh.repeat(X.size(0), 1))
    c = nn.softmax(h @ Whc + bhc.repeat(h.size(0), 1))
    return c


G_params = [Wzh, bzh, Whx, bhx]
D_params = [Wxh, bxh, Why, bhy]
Q_params = [Wqxh, bqxh, Whc, bhc]
params = G_params + D_params + Q_params


""" ===================== TRAINING ======================== """


def reset_grad():
    for p in params:
        if p.grad is not None:
            data = p.grad.data
            p.grad = Variable(data.new().resize_as_(data).zero_())


G_solver = optim.Adam(G_params, lr=1e-3)
D_solver = optim.Adam(D_params, lr=1e-3)
Q_solver = optim.Adam(G_params + Q_params, lr=1e-3)


def sample_c(size):
    c = np.random.multinomial(1, 10*[0.1], size=size)
    c = Variable(torch.from_numpy(c.astype('float32')))
    return c


for it in range(100000):
    # Sample data
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))

    z = Variable(torch.randn(mb_size, Z_dim))
    c = sample_c(mb_size)

    # Dicriminator forward-loss-backward-update
    G_sample = G(z, c)
    D_real = D(X)
    D_fake = D(G_sample)

    D_loss = -torch.mean(torch.log(D_real + 1e-8) + torch.log(1 - D_fake + 1e-8))

    D_loss.backward()
    D_solver.step()

    # Housekeeping - reset gradient
    reset_grad()

    # Generator forward-loss-backward-update
    G_sample = G(z, c)
    D_fake = D(G_sample)

    G_loss = -torch.mean(torch.log(D_fake + 1e-8))

    G_loss.backward()
    G_solver.step()

    # Housekeeping - reset gradient
    reset_grad()

    # Q forward-loss-backward-update
    G_sample = G(z, c)
    Q_c_given_x = Q(G_sample)

    crossent_loss = torch.mean(-torch.sum(c * torch.log(Q_c_given_x + 1e-8), dim=1))
    mi_loss = crossent_loss

    mi_loss.backward()
    Q_solver.step()

    # Housekeeping - reset gradient
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        idx = np.random.randint(0, 10)
        c = np.zeros([mb_size, 10])
        c[range(mb_size), idx] = 1
        c = Variable(torch.from_numpy(c.astype('float32')))
        samples = G(z, c).data.numpy()[:16]

        print('Iter-{}; D_loss: {}; G_loss: {}; Idx: {}'
              .format(it, D_loss.data.numpy(), G_loss.data.numpy(), idx))

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'
                    .format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/infogan/infogan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


X = tf.placeholder(tf.float32, shape=[None, 784])

D_W1 = tf.Variable(xavier_init([784, 128]))
D_b1 = tf.Variable(tf.zeros(shape=[128]))

D_W2 = tf.Variable(xavier_init([128, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_b1, D_b2]


Z = tf.placeholder(tf.float32, shape=[None, 16])
c = tf.placeholder(tf.float32, shape=[None, 10])

G_W1 = tf.Variable(xavier_init([26, 256]))
G_b1 = tf.Variable(tf.zeros(shape=[256]))

G_W2 = tf.Variable(xavier_init([256, 784]))
G_b2 = tf.Variable(tf.zeros(shape=[784]))

theta_G = [G_W1, G_W2, G_b1, G_b2]


Q_W1 = tf.Variable(xavier_init([784, 128]))
Q_b1 = tf.Variable(tf.zeros(shape=[128]))

Q_W2 = tf.Variable(xavier_init([128, 10]))
Q_b2 = tf.Variable(tf.zeros(shape=[10]))

theta_Q = [Q_W1, Q_W2, Q_b1, Q_b2]


def sample_Z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def sample_c(m):
    return np.random.multinomial(1, 10*[0.1], size=m)


def generator(z, c):
    inputs = tf.concat(axis=1, values=[z, c])
    G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)

    return G_prob


def discriminator(x):
    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)
    D_logit = tf.matmul(D_h1, D_W2) + D_b2
    D_prob = tf.nn.sigmoid(D_logit)

    return D_prob


def Q(x):
    Q_h1 = tf.nn.relu(tf.matmul(x, Q_W1) + Q_b1)
    Q_prob = tf.nn.softmax(tf.matmul(Q_h1, Q_W2) + Q_b2)

    return Q_prob


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


G_sample = generator(Z, c)
D_real = discriminator(X)
D_fake = discriminator(G_sample)
Q_c_given_x = Q(G_sample)

D_loss = -tf.reduce_mean(tf.log(D_real + 1e-8) + tf.log(1 - D_fake + 1e-8))
G_loss = -tf.reduce_mean(tf.log(D_fake + 1e-8))

cross_ent = tf.reduce_mean(-tf.reduce_sum(tf.log(Q_c_given_x + 1e-8) * c, 1))
Q_loss = cross_ent

D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)
Q_solver = tf.train.AdamOptimizer().minimize(Q_loss, var_list=theta_G + theta_Q)

mb_size = 32
Z_dim = 16

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    if it % 1000 == 0:
        Z_noise = sample_Z(16, Z_dim)

        idx = np.random.randint(0, 10)
        c_noise = np.zeros([16, 10])
        c_noise[range(16), idx] = 1

        samples = sess.run(G_sample,
                           feed_dict={Z: Z_noise, c: c_noise})

        fig = plot(samples)
        plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)

    X_mb, _ = mnist.train.next_batch(mb_size)
    Z_noise = sample_Z(mb_size, Z_dim)
    c_noise = sample_c(mb_size)

    _, D_loss_curr = sess.run([D_solver, D_loss],
                              feed_dict={X: X_mb, Z: Z_noise, c: c_noise})

    _, G_loss_curr = sess.run([G_solver, G_loss],
                              feed_dict={Z: Z_noise, c: c_noise})

    sess.run([Q_solver], feed_dict={Z: Z_noise, c: c_noise})

    if it % 1000 == 0:
        print('Iter: {}'.format(it))
        print('D loss: {:.4}'. format(D_loss_curr))
        print('G_loss: {:.4}'.format(G_loss_curr))
        print()


================================================
FILE: GAN/least_squares_gan/lsgan_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
d_step = 3
lr = 1e-3


G = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)


D = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1),
)


def reset_grad():
    G.zero_grad()
    D.zero_grad()


G_solver = optim.Adam(G.parameters(), lr=lr)
D_solver = optim.Adam(D.parameters(), lr=lr)


for it in range(1000000):
    for _ in range(d_step):
        # Sample data
        z = Variable(torch.randn(mb_size, z_dim))
        X, _ = mnist.train.next_batch(mb_size)
        X = Variable(torch.from_numpy(X))

        # Dicriminator
        G_sample = G(z)
        D_real = D(X)
        D_fake = D(G_sample)

        D_loss = 0.5 * (torch.mean((D_real - 1)**2) + torch.mean(D_fake**2))

        D_loss.backward()
        D_solver.step()
        reset_grad()

    # Generator
    z = Variable(torch.randn(mb_size, z_dim))

    G_sample = G(z)
    D_fake = D(G_sample)

    G_loss = 0.5 * torch.mean((D_fake - 1)**2)

    G_loss.backward()
    G_solver.step()
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss.data[0], G_loss.data[0]))

        samples = G(z).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/least_squares_gan/lsgan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


mb_size = 32
X_dim = 784
z_dim = 64
h_dim = 128
lr = 1e-3
d_steps = 3

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]
theta_D = [D_W1, D_W2, D_b1, D_b2]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def generator(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)
    return G_prob


def discriminator(x):
    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)
    out = tf.matmul(D_h1, D_W2) + D_b2
    return out


G_sample = generator(z)

D_real = discriminator(X)
D_fake = discriminator(G_sample)

D_loss = 0.5 * (tf.reduce_mean((D_real - 1)**2) + tf.reduce_mean(D_fake**2))
G_loss = 0.5 * tf.reduce_mean((D_fake - 1)**2)

D_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(D_loss, var_list=theta_D))
G_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(G_loss, var_list=theta_G))

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    for _ in range(d_steps):
        X_mb, _ = mnist.train.next_batch(mb_size)
        z_mb = sample_z(mb_size, z_dim)

        _, D_loss_curr = sess.run(
            [D_solver, D_loss],
            feed_dict={X: X_mb, z: z_mb}
        )

    X_mb, _ = mnist.train.next_batch(mb_size)
    z_mb = sample_z(mb_size, z_dim)

    _, G_loss_curr = sess.run(
        [G_solver, G_loss],
        feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)}
    )

    if it % 1000 == 0:
        print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr))

        samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out/{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: GAN/magan/magan_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
d_step = 3
lr = 5e-4
m = 5
n_iter = 1000
n_epoch = 1000
N = n_iter * mb_size  # N data per epoch


G = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)

# D is an autoencoder
D_ = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
)


# Energy is the MSE of autoencoder
def D(X):
    X_recon = D_(X)
    return torch.sum((X - X_recon)**2, 1)


def reset_grad():
    G.zero_grad()
    D_.zero_grad()


G_solver = optim.Adamax(G.parameters(), lr=lr)
D_solver = optim.Adamax(D_.parameters(), lr=lr)


# Pretrain discriminator
for it in range(2*n_iter):
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))

    loss = torch.mean(D(X))  # Minimize real samples energy

    loss.backward()
    D_solver.step()
    reset_grad()

    if it % 1000 == 0:
        print('Iter-{}; Pretrained D loss: {:.4}'.format(it, loss.data[0]))


# Initial margin, expected energy of real data
m = torch.mean(D(Variable(torch.from_numpy(mnist.train.images)))).data[0]
s_z_before = torch.from_numpy(np.array([np.inf], dtype='float32'))


# GAN training
for t in range(n_epoch):
    s_x, s_z = torch.zeros(1), torch.zeros(1)

    for it in range(n_iter):
        # Sample data
        z = Variable(torch.randn(mb_size, z_dim))
        X, _ = mnist.train.next_batch(mb_size)
        X = Variable(torch.from_numpy(X))

        # Dicriminator
        G_sample = G(z)
        D_real = D(X)
        D_fake = D(G_sample)

        D_loss = torch.mean(D_real) + nn.relu(m - torch.mean(D_fake))

        D_loss.backward()
        D_solver.step()

        # Update real samples statistics
        s_x += torch.sum(D_real.data)

        reset_grad()

        # Generator
        z = Variable(torch.randn(mb_size, z_dim))
        G_sample = G(z)
        D_fake = D(G_sample)

        G_loss = torch.mean(D_fake)

        G_loss.backward()
        G_solver.step()

        # Update fake samples statistics
        s_z += torch.sum(D_fake.data)

        reset_grad()

    # Update margin
    if (((s_x[0] / N) < m) and (s_x[0] < s_z[0]) and (s_z_before[0] < s_z[0])):
        m = s_x[0] / N

    s_z_before = s_z

    # Convergence measure
    Ex = s_x[0] / N
    Ez = s_z[0] / N
    L = Ex + np.abs(Ex - Ez)

    # Visualize
    print('Epoch-{}; m = {:.4}; L = {:.4}'
          .format(t, m, L))

    samples = G(z).data.numpy()[:16]

    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    if not os.path.exists('out/'):
        os.makedirs('out/')

    plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
    cnt += 1
    plt.close(fig)


================================================
FILE: GAN/magan/magan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


mb_size = 32
X_dim = 784
z_dim = 64
h_dim = 128
lr = 5e-4
n_iter = 1000
n_epoch = 1000
N = n_iter * mb_size  # N data per epoch

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])
m = tf.placeholder(tf.float32)

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
D_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]
theta_D = [D_W1, D_W2, D_b1, D_b2]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def G(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)
    return G_prob


def D(X):
    D_h1 = tf.nn.relu(tf.matmul(X, D_W1) + D_b1)
    X_recon = tf.matmul(D_h1, D_W2) + D_b2
    return tf.reduce_sum((X - X_recon)**2, 1)


G_sample = G(z)

D_real = D(X)
D_fake = D(G_sample)

D_recon_loss = tf.reduce_mean(D_real)
D_loss = tf.reduce_mean(D_real + tf.maximum(0., m - D_fake))
G_loss = tf.reduce_mean(D_fake)

D_recon_solver = (tf.train.AdamOptimizer(learning_rate=lr)
                  .minimize(D_recon_loss, var_list=theta_D))
D_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(D_loss, var_list=theta_D))
G_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(G_loss, var_list=theta_G))

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')


# Pretrain
for it in range(2*n_iter):
    X_mb, _ = mnist.train.next_batch(mb_size)

    _, D_recon_loss_curr = sess.run(
        [D_recon_solver, D_recon_loss], feed_dict={X: X_mb}
    )

    if it % 1000 == 0:
        print('Iter-{}; Pretrained D loss: {:.4}'.format(it, D_recon_loss_curr))


i = 0
# Initial margin, expected energy of real data
margin = sess.run(D_recon_loss, feed_dict={X: mnist.train.images})
s_z_before = np.inf

# GAN training
for t in range(n_epoch):
    s_x, s_z = 0., 0.

    for it in range(n_iter):
        X_mb, _ = mnist.train.next_batch(mb_size)
        z_mb = sample_z(mb_size, z_dim)

        _, D_loss_curr, D_real_curr = sess.run(
            [D_solver, D_loss, D_real], feed_dict={X: X_mb, z: z_mb, m: margin}
        )

        # Update real samples statistics
        s_x += np.sum(D_real_curr)

        _, G_loss_curr, D_fake_curr = sess.run(
            [G_solver, G_loss, D_fake],
            feed_dict={X: X_mb, z: sample_z(mb_size, z_dim), m: margin}
        )

        # Update fake samples statistics
        s_z += np.sum(D_fake_curr)

    # Update margin
    if (s_x / N < margin) and (s_x < s_z) and (s_z_before < s_z):
        margin = s_x / N

    s_z_before = s_z

    # Convergence measure
    Ex = s_x / N
    Ez = s_z / N
    L = Ex + np.abs(Ex - Ez)

    # Visualize
    print('Epoch: {}; m: {:.4}, L: {:.4}'.format(t, margin, L))

    samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})

    fig = plot(samples)
    plt.savefig('out/{}.png'
                .format(str(i).zfill(3)), bbox_inches='tight')
    i += 1
    plt.close(fig)


================================================
FILE: GAN/mode_regularized_gan/mode_reg_gan_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 128
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-4
lam1 = 1e-2
lam2 = 1e-2


def log(x):
    return torch.log(x + 1e-8)


E = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, z_dim)
)

G = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)

D = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1),
    torch.nn.Sigmoid()
)


def reset_grad():
    G.zero_grad()
    D.zero_grad()
    E.zero_grad()


def sample_X(size, include_y=False):
    X, y = mnist.train.next_batch(size)
    X = Variable(torch.from_numpy(X))

    if include_y:
        y = np.argmax(y, axis=1).astype(np.int)
        y = Variable(torch.from_numpy(y))
        return X, y

    return X


E_solver = optim.Adam(E.parameters(), lr=lr)
G_solver = optim.Adam(G.parameters(), lr=lr)
D_solver = optim.Adam(D.parameters(), lr=lr)


for it in range(1000000):
    """ Discriminator """
    # Sample data
    X = sample_X(mb_size)
    z = Variable(torch.randn(mb_size, z_dim))

    # Dicriminator_1 forward-loss-backward-update
    G_sample = G(z)
    D_real = D(X)
    D_fake = D(G_sample)

    D_loss = -torch.mean(log(D_real) + log(1 - D_fake))

    D_loss.backward()
    D_solver.step()

    # Housekeeping - reset gradient
    reset_grad()

    """ Generator """
    # Sample data
    X = sample_X(mb_size)
    z = Variable(torch.randn(mb_size, z_dim))

    # Generator forward-loss-backward-update
    G_sample = G(z)
    G_sample_reg = G(E(X))
    D_fake = D(G_sample)
    D_reg = D(G_sample_reg)

    mse = torch.sum((X - G_sample_reg)**2, 1)
    reg = torch.mean(lam1 * mse + lam2 * log(D_reg))
    G_loss = -torch.mean(log(D_fake)) + reg

    G_loss.backward()
    G_solver.step()

    # Housekeeping - reset gradient
    reset_grad()

    """ Encoder """
    # Sample data
    X = sample_X(mb_size)
    z = Variable(torch.randn(mb_size, z_dim))

    G_sample_reg = G(E(X))
    D_reg = D(G_sample_reg)

    mse = torch.sum((X - G_sample_reg)**2, 1)
    E_loss = torch.mean(lam1 * mse + lam2 * log(D_reg))

    E_loss.backward()
    E_solver.step()

    # Housekeeping - reset gradient
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {}; E_loss: {}; G_loss: {}'
              .format(it, D_loss.data.numpy(), E_loss.data.numpy(), G_loss.data.numpy()))

        samples = G(z).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'
                    .format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/mode_regularized_gan/mode_reg_gan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


mb_size = 32
X_dim = 784
z_dim = 10
h_dim = 128
lam1 = 1e-2
lam2 = 1e-2

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


def log(x):
    return tf.log(x + 1e-8)


X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

E_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
E_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
E_W2 = tf.Variable(xavier_init([h_dim, z_dim]))
E_b2 = tf.Variable(tf.zeros(shape=[z_dim]))

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_E = [E_W1, E_W2, E_b1, E_b2]
theta_G = [G_W1, G_W2, G_b1, G_b2]
theta_D = [D_W1, D_W2, D_b1, D_b2]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def encoder(x):
    E_h1 = tf.nn.relu(tf.matmul(x, E_W1) + E_b1)
    out = tf.matmul(E_h1, E_W2) + E_b2
    return out


def generator(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)
    return G_prob


def discriminator(x):
    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)
    D_log_prob = tf.matmul(D_h1, D_W2) + D_b2
    D_prob = tf.nn.sigmoid(D_log_prob)
    return D_prob


G_sample = generator(z)
G_sample_reg = generator(encoder(X))

D_real = discriminator(X)
D_fake = discriminator(G_sample)
D_reg = discriminator(G_sample_reg)

mse = tf.reduce_sum((X - G_sample_reg)**2, 1)

D_loss = -tf.reduce_mean(log(D_real) + log(1 - D_fake))
E_loss = tf.reduce_mean(lam1 * mse + lam2 * log(D_reg))
G_loss = -tf.reduce_mean(log(D_fake)) + E_loss

E_solver = (tf.train.AdamOptimizer(learning_rate=1e-3)
            .minimize(E_loss, var_list=theta_E))
D_solver = (tf.train.AdamOptimizer(learning_rate=1e-3)
            .minimize(D_loss, var_list=theta_D))
G_solver = (tf.train.AdamOptimizer(learning_rate=1e-3)
            .minimize(G_loss, var_list=theta_G))

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X_mb, _ = mnist.train.next_batch(mb_size)

    _, D_loss_curr = sess.run(
        [D_solver, D_loss],
        feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)}
    )

    _, G_loss_curr = sess.run(
        [G_solver, G_loss],
        feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)}
    )

    _, E_loss_curr = sess.run(
        [E_solver, E_loss],
        feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)}
    )

    if it % 1000 == 0:
        print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}; E_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr, E_loss_curr))

        samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out/{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: GAN/softmax_gan/softmax_gan_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 16
z_dim = 10
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-3


def log(x):
    return torch.log(x + 1e-8)


G = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)


D = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1)
)


def reset_grad():
    G.zero_grad()
    D.zero_grad()


G_solver = optim.Adam(G.parameters(), lr=lr)
D_solver = optim.Adam(D.parameters(), lr=lr)


D_target = 1./mb_size
G_target = 1./(mb_size*2)

for it in range(1000000):
    # Sample data
    z = Variable(torch.randn(mb_size, z_dim))
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))

    G_sample = G(z)
    D_real = D(X)
    D_fake = D(G_sample)

    # Partition func.
    Z = torch.sum(torch.exp(-D_real)) + torch.sum(torch.exp(-D_fake))

    # Dicriminator
    D_loss = torch.sum(D_target * D_real) + log(Z)

    D_loss.backward(retain_graph=True)
    D_solver.step()
    reset_grad()

    # Generator
    G_loss = torch.sum(G_target * D_real) + torch.sum(G_target * D_fake) + log(Z)

    G_loss.backward()
    G_solver.step()
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss.data[0], G_loss.data[0]))

        samples = G(z).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/softmax_gan/softmax_gan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


mb_size = 32
X_dim = 784
z_dim = 64
h_dim = 128
lr = 1e-3
d_steps = 3

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


def log(x):
    return tf.log(x + 1e-8)


X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]
theta_D = [D_W1, D_W2, D_b1, D_b2]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def G(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)
    return G_prob


def D(X):
    D_h1 = tf.nn.relu(tf.matmul(X, D_W1) + D_b1)
    out = tf.matmul(D_h1, D_W2) + D_b2
    return out


G_sample = G(z)

D_real = D(X)
D_fake = D(G_sample)

D_target = 1./mb_size
G_target = 1./(mb_size*2)

Z = tf.reduce_sum(tf.exp(-D_real)) + tf.reduce_sum(tf.exp(-D_fake))

D_loss = tf.reduce_sum(D_target * D_real) + log(Z)
G_loss = tf.reduce_sum(G_target * D_real) + tf.reduce_sum(G_target * D_fake) + log(Z)

D_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(D_loss, var_list=theta_D))
G_solver = (tf.train.AdamOptimizer(learning_rate=lr)
            .minimize(G_loss, var_list=theta_G))

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X_mb, _ = mnist.train.next_batch(mb_size)
    z_mb = sample_z(mb_size, z_dim)

    _, D_loss_curr = sess.run(
        [D_solver, D_loss], feed_dict={X: X_mb, z: z_mb}
    )

    _, G_loss_curr = sess.run(
        [G_solver, G_loss], feed_dict={X: X_mb, z: z_mb}
    )

    if it % 1000 == 0:
        print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr))

        samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out/{}.png'
                    .format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: GAN/vanilla_gan/gan_pytorch.py
================================================
import torch
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 64
Z_dim = 100
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
c = 0
lr = 1e-3


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / np.sqrt(in_dim / 2.)
    return Variable(torch.randn(*size) * xavier_stddev, requires_grad=True)


""" ==================== GENERATOR ======================== """

Wzh = xavier_init(size=[Z_dim, h_dim])
bzh = Variable(torch.zeros(h_dim), requires_grad=True)

Whx = xavier_init(size=[h_dim, X_dim])
bhx = Variable(torch.zeros(X_dim), requires_grad=True)


def G(z):
    h = nn.relu(z @ Wzh + bzh.repeat(z.size(0), 1))
    X = nn.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1))
    return X


""" ==================== DISCRIMINATOR ======================== """

Wxh = xavier_init(size=[X_dim, h_dim])
bxh = Variable(torch.zeros(h_dim), requires_grad=True)

Why = xavier_init(size=[h_dim, 1])
bhy = Variable(torch.zeros(1), requires_grad=True)


def D(X):
    h = nn.relu(X @ Wxh + bxh.repeat(X.size(0), 1))
    y = nn.sigmoid(h @ Why + bhy.repeat(h.size(0), 1))
    return y


G_params = [Wzh, bzh, Whx, bhx]
D_params = [Wxh, bxh, Why, bhy]
params = G_params + D_params


""" ===================== TRAINING ======================== """


def reset_grad():
    for p in params:
        if p.grad is not None:
            data = p.grad.data
            p.grad = Variable(data.new().resize_as_(data).zero_())


G_solver = optim.Adam(G_params, lr=1e-3)
D_solver = optim.Adam(D_params, lr=1e-3)

ones_label = Variable(torch.ones(mb_size, 1))
zeros_label = Variable(torch.zeros(mb_size, 1))


for it in range(100000):
    # Sample data
    z = Variable(torch.randn(mb_size, Z_dim))
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))

    # Dicriminator forward-loss-backward-update
    G_sample = G(z)
    D_real = D(X)
    D_fake = D(G_sample)

    D_loss_real = nn.binary_cross_entropy(D_real, ones_label)
    D_loss_fake = nn.binary_cross_entropy(D_fake, zeros_label)
    D_loss = D_loss_real + D_loss_fake

    D_loss.backward()
    D_solver.step()

    # Housekeeping - reset gradient
    reset_grad()

    # Generator forward-loss-backward-update
    z = Variable(torch.randn(mb_size, Z_dim))
    G_sample = G(z)
    D_fake = D(G_sample)

    G_loss = nn.binary_cross_entropy(D_fake, ones_label)

    G_loss.backward()
    G_solver.step()

    # Housekeeping - reset gradient
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {}; G_loss: {}'.format(it, D_loss.data.numpy(), G_loss.data.numpy()))

        samples = G(z).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(c).zfill(3)), bbox_inches='tight')
        c += 1
        plt.close(fig)


================================================
FILE: GAN/vanilla_gan/gan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


X = tf.placeholder(tf.float32, shape=[None, 784])

D_W1 = tf.Variable(xavier_init([784, 128]))
D_b1 = tf.Variable(tf.zeros(shape=[128]))

D_W2 = tf.Variable(xavier_init([128, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_b1, D_b2]


Z = tf.placeholder(tf.float32, shape=[None, 100])

G_W1 = tf.Variable(xavier_init([100, 128]))
G_b1 = tf.Variable(tf.zeros(shape=[128]))

G_W2 = tf.Variable(xavier_init([128, 784]))
G_b2 = tf.Variable(tf.zeros(shape=[784]))

theta_G = [G_W1, G_W2, G_b1, G_b2]


def sample_Z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def generator(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)

    return G_prob


def discriminator(x):
    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)
    D_logit = tf.matmul(D_h1, D_W2) + D_b2
    D_prob = tf.nn.sigmoid(D_logit)

    return D_prob, D_logit


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


G_sample = generator(Z)
D_real, D_logit_real = discriminator(X)
D_fake, D_logit_fake = discriminator(G_sample)

# D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake))
# G_loss = -tf.reduce_mean(tf.log(D_fake))

# Alternative losses:
# -------------------
D_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_real, labels=tf.ones_like(D_logit_real)))
D_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.zeros_like(D_logit_fake)))
D_loss = D_loss_real + D_loss_fake
G_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=D_logit_fake, labels=tf.ones_like(D_logit_fake)))

D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)

mb_size = 128
Z_dim = 100

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    if it % 1000 == 0:
        samples = sess.run(G_sample, feed_dict={Z: sample_Z(16, Z_dim)})

        fig = plot(samples)
        plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)

    X_mb, _ = mnist.train.next_batch(mb_size)

    _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, Z: sample_Z(mb_size, Z_dim)})
    _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={Z: sample_Z(mb_size, Z_dim)})

    if it % 1000 == 0:
        print('Iter: {}'.format(it))
        print('D loss: {:.4}'. format(D_loss_curr))
        print('G_loss: {:.4}'.format(G_loss_curr))
        print()


================================================
FILE: GAN/wasserstein_gan/wgan_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-4


G = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)


D = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1),
)


def reset_grad():
    G.zero_grad()
    D.zero_grad()


G_solver = optim.RMSprop(G.parameters(), lr=lr)
D_solver = optim.RMSprop(D.parameters(), lr=lr)


for it in range(1000000):
    for _ in range(5):
        # Sample data
        z = Variable(torch.randn(mb_size, z_dim))
        X, _ = mnist.train.next_batch(mb_size)
        X = Variable(torch.from_numpy(X))

        # Dicriminator forward-loss-backward-update
        G_sample = G(z)
        D_real = D(X)
        D_fake = D(G_sample)

        D_loss = -(torch.mean(D_real) - torch.mean(D_fake))

        D_loss.backward()
        D_solver.step()

        # Weight clipping
        for p in D.parameters():
            p.data.clamp_(-0.01, 0.01)

        # Housekeeping - reset gradient
        reset_grad()

    # Generator forward-loss-backward-update
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))
    z = Variable(torch.randn(mb_size, z_dim))

    G_sample = G(z)
    D_fake = D(G_sample)

    G_loss = -torch.mean(D_fake)

    G_loss.backward()
    G_solver.step()

    # Housekeeping - reset gradient
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {}; G_loss: {}'
              .format(it, D_loss.data.numpy(), G_loss.data.numpy()))

        samples = G(z).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: GAN/wasserstein_gan/wgan_tensorflow.py
================================================
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os


mb_size = 32
X_dim = 784
z_dim = 10
h_dim = 128

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


X = tf.placeholder(tf.float32, shape=[None, X_dim])

D_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_b1, D_b2]


z = tf.placeholder(tf.float32, shape=[None, z_dim])

G_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

G_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
G_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_G = [G_W1, G_W2, G_b1, G_b2]


def sample_z(m, n):
    return np.random.uniform(-1., 1., size=[m, n])


def generator(z):
    G_h1 = tf.nn.relu(tf.matmul(z, G_W1) + G_b1)
    G_log_prob = tf.matmul(G_h1, G_W2) + G_b2
    G_prob = tf.nn.sigmoid(G_log_prob)
    return G_prob


def discriminator(x):
    D_h1 = tf.nn.relu(tf.matmul(x, D_W1) + D_b1)
    out = tf.matmul(D_h1, D_W2) + D_b2
    return out


G_sample = generator(z)
D_real = discriminator(X)
D_fake = discriminator(G_sample)

D_loss = tf.reduce_mean(D_real) - tf.reduce_mean(D_fake)
G_loss = -tf.reduce_mean(D_fake)

D_solver = (tf.train.RMSPropOptimizer(learning_rate=1e-4)
            .minimize(-D_loss, var_list=theta_D))
G_solver = (tf.train.RMSPropOptimizer(learning_rate=1e-4)
            .minimize(G_loss, var_list=theta_G))

clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in theta_D]

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    for _ in range(5):
        X_mb, _ = mnist.train.next_batch(mb_size)

        _, D_loss_curr, _ = sess.run(
            [D_solver, D_loss, clip_D],
            feed_dict={X: X_mb, z: sample_z(mb_size, z_dim)}
        )

    _, G_loss_curr = sess.run(
        [G_solver, G_loss],
        feed_dict={z: sample_z(mb_size, z_dim)}
    )

    if it % 100 == 0:
        print('Iter: {}; D loss: {:.4}; G_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr))

        if it % 1000 == 0:
            samples = sess.run(G_sample, feed_dict={z: sample_z(16, z_dim)})

            fig = plot(samples)
            plt.savefig('out/{}.png'
                        .format(str(i).zfill(3)), bbox_inches='tight')
            i += 1
            plt.close(fig)


================================================
FILE: HelmholtzMachine/README.md
================================================
# Helmholtz Machines

Implementation of (Binary) Helmholtz Machines.

## Disclaimer
Currently the results is not that good. However, it might be useful to be used to gain intuition of Wake-Sleep Algorithm.


================================================
FILE: HelmholtzMachine/vanilla_HM/helmholtz.py
================================================
"""
One layer Binary Helmholtz Machine
==================================
"""
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from tensorflow.examples.tutorials.mnist import input_data


if not os.path.exists('out/'):
    os.makedirs('out/')

mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]

mb_size = 16
h_dim = 36

# Recognition/inference weight
R = np.random.randn(X_dim, h_dim) * 0.001
# Generative weight
W = np.random.randn(h_dim, X_dim) * 0.001
# Generative bias of hidden variables
B = np.random.randn(h_dim) * 0.001


def sigm(x):
    return 1/(1 + np.exp(-x))


def infer(X):
    # mb_size x x_dim -> mb_size x h_dim
    return sigm(X @ R)


def generate(H):
    # mb_size x h_dim -> mb_size x x_dim
    return sigm(H @ W)


# Wake-Sleep Algorithm
# --------------------
alpha = 0.1

for t in range(1, 1001):
    # ----------
    # Wake phase
    # ----------

    # Upward pass
    X_mb = (mnist.train.next_batch(mb_size)[0] > 0.5).astype(np.float)
    H = np.random.binomial(n=1, p=infer(X_mb))

    # Downward pass
    H_prime = sigm(B)
    V = generate(H)

    # Compute gradient
    dB = H - H_prime
    dW = np.array([np.outer(H[i], X_mb[i] - V[i]) for i in range(mb_size)])

    # Update generative weight
    B += (alpha/t) * np.mean(dB, axis=0)
    W += (alpha/t) * np.mean(dW, axis=0)

    # -----------
    # Sleep phase
    # -----------

    # Downward pass
    H_mb = np.random.binomial(n=1, p=sigm(B))
    V = np.random.binomial(n=1, p=generate(H_mb))

    # Upward pass
    H = infer(V)

    # Compute gradient
    dR = np.array([np.outer(V, H_mb[i] - H[i]) for i in range(mb_size)])

    # Update recognition weight
    R += (alpha/t) * np.mean(dR, axis=0)


# Visualization
# -------------

def plot(samples, size, name):
    size = int(size)
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(size, size), cmap='Greys_r')

    plt.savefig('out/{}.png'.format(name), bbox_inches='tight')
    plt.close(fig)


X = (mnist.test.next_batch(mb_size)[0] > 0.5).astype(np.float)

H = np.random.binomial(n=1, p=infer(X))
plot(H, np.sqrt(h_dim), 'H')

X_recon = np.random.binomial(n=1, p=generate(H))
plot(X_recon, np.sqrt(X_dim), 'V')


================================================
FILE: LICENSE
================================================
This is free and unencumbered software released into the public domain.

Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.

In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.

For more information, please refer to <http://unlicense.org>


================================================
FILE: RBM/README.md
================================================
# Restricted Boltzmann Machines

Implementation of (Binary) Restricted Boltzmann Machines (RBM). Hidden and visible variables are Bernoulli

## Algorithm List
1. RBM with Contrastive Divergence (CD)
2. RBM with Persistent Contrastive Divergence (PCD)


================================================
FILE: RBM/rbm_binary_cd.py
================================================
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from tensorflow.examples.tutorials.mnist import input_data


if not os.path.exists('out/'):
    os.makedirs('out/')

mnist = input_data.read_data_sets('../MNIST_data', one_hot=True)
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]

mb_size = 16
h_dim = 36

W = np.random.randn(X_dim, h_dim) * 0.001
a = np.random.randn(h_dim) * 0.001
b = np.random.randn(X_dim) * 0.001


def sigm(x):
    return 1/(1 + np.exp(-x))


def infer(X):
    # mb_size x x_dim -> mb_size x h_dim
    return sigm(X @ W)


def generate(H):
    # mb_size x h_dim -> mb_size x x_dim
    return sigm(H @ W.T)


# Contrastive Divergence
# ----------------------
# Approximate the log partition gradient Gibbs sampling

alpha = 0.1
K = 10  # Num. of Gibbs sampling step

for t in range(1, 1001):
    X_mb = (mnist.train.next_batch(mb_size)[0] > 0.5).astype(np.float)
    g = 0
    g_a = 0
    g_b = 0

    for v in X_mb:
        # E[h|v,W]
        h = infer(v)

        # Gibbs sampling steps
        # --------------------
        v_prime = np.copy(v)

        for k in range(K):
            # h ~ p(h|v,W)
            h_prime = np.random.binomial(n=1, p=infer(v_prime))
            # v ~ p(v|h,W)
            v_prime = np.random.binomial(n=1, p=generate(h_prime))

        # E[h|v',W]
        h_prime = infer(v_prime)

        # Compute data gradient
        grad_w = np.outer(v, h) - np.outer(v_prime, h_prime)
        grad_a = h - h_prime
        grad_b = v - v_prime

        # Accumulate minibatch gradient
        g += grad_w
        g_a += grad_a
        g_b += grad_b

    # Monte carlo gradient
    g *= 1 / mb_size
    g_a *= 1 / mb_size
    g_b *= 1 / mb_size

    # Update to maximize
    W += alpha * g
    a += alpha * g_a
    b += alpha * g_b


# Visualization
# -------------

def plot(samples, size, name):
    size = int(size)
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(size, size), cmap='Greys_r')

    plt.savefig('out/{}.png'.format(name), bbox_inches='tight')
    plt.close(fig)


X = (mnist.test.next_batch(mb_size)[0] > 0.5).astype(np.float)

H = np.random.binomial(n=1, p=infer(X))
plot(H, np.sqrt(h_dim), 'H')

X_recon = (generate(H) > 0.5).astype(np.float)
plot(X_recon, np.sqrt(X_dim), 'V')


================================================
FILE: RBM/rbm_binary_pcd.py
================================================
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from tensorflow.examples.tutorials.mnist import input_data


if not os.path.exists('out/'):
    os.makedirs('out/')

mnist = input_data.read_data_sets('../MNIST_data', one_hot=True)
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]

mb_size = 16
h_dim = 36

W = np.random.randn(X_dim, h_dim) * 0.001


def sigm(x):
    return 1/(1 + np.exp(-x))


def infer(X):
    # mb_size x x_dim -> mb_size x h_dim
    return sigm(X @ W)


def generate(H):
    # mb_size x h_dim -> mb_size x x_dim
    return sigm(H @ W.T)


# Persistent Contrastive Divergence
# ---------------------------------
# Approximate the log partition gradient using single step Gibbs sampling

alpha = 0.1
K = 10  # Num. of MC iteration

# Initialize the markov chain
V_s = sigm(np.random.randn(mb_size, X_dim))
H_s = np.random.binomial(n=1, p=0.5, size=[mb_size, h_dim])

for t in range(1, 1001):
    X_mb = (mnist.train.next_batch(mb_size)[0] > 0.5).astype(np.float)
    g = 0

    Mu = infer(X_mb)

    # Gibbs sampling step
    # -------------------
    for i, v_s in enumerate(V_s):
        for k in range(K):
            # h ~ p(h|v,W)
            h_prime = np.random.binomial(n=1, p=infer(v_s))
            # v ~ p(v|h,W)
            v_prime = np.random.binomial(n=1, p=generate(h_prime))

        # Replace with new sample
        V_s[i] = v_prime
        H_s[i] = h_prime

    # Compute average gradient
    left = np.array([np.outer(X_mb[i], Mu[i]) for i in range(mb_size)])
    right = np.array([np.outer(V_s[i], H_s[i]) for i in range(mb_size)])
    g = np.mean(left, axis=0) - np.mean(right, axis=0)

    # Update
    W += alpha * g  # Maximize likelihood


# Visualization
# -------------

def plot(samples, size, name):
    size = int(size)
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(size, size), cmap='Greys_r')

    plt.savefig('out/{}.png'.format(name), bbox_inches='tight')
    plt.close(fig)


X = (mnist.test.next_batch(mb_size)[0] > 0.5).astype(np.float)

H = np.random.binomial(n=1, p=infer(X))
plot(H, np.sqrt(h_dim), 'H')

X_recon = (generate(H) > 0.5).astype(np.float)
plot(X_recon, np.sqrt(X_dim), 'V')


================================================
FILE: README.md
================================================
# Generative Models
Collection of generative models, e.g. GAN, VAE in Pytorch and Tensorflow.
Also present here are RBM and Helmholtz Machine.

## Note:
Generated samples will be stored in `GAN/{gan_model}/out` (or `VAE/{vae_model}/out`, etc) directory during training.

## What's in it?

#### Generative Adversarial Nets (GAN)
  1. [Vanilla GAN](https://arxiv.org/abs/1406.2661)
  2. [Conditional GAN](https://arxiv.org/abs/1411.1784)
  3. [InfoGAN](https://arxiv.org/abs/1606.03657)
  4. [Wasserstein GAN](https://arxiv.org/abs/1701.07875)
  5. [Mode Regularized GAN](https://arxiv.org/abs/1612.02136)
  6. [Coupled GAN](https://arxiv.org/abs/1606.07536)
  7. [Auxiliary Classifier GAN](https://arxiv.org/abs/1610.09585)
  8. [Least Squares GAN](https://arxiv.org/abs/1611.04076v2)
  9. [Boundary Seeking GAN](https://arxiv.org/abs/1702.08431)
  10. [Energy Based GAN](https://arxiv.org/abs/1609.03126)
  11. [f-GAN](https://arxiv.org/abs/1606.00709)
  12. [Generative Adversarial Parallelization](https://arxiv.org/abs/1612.04021)
  13. [DiscoGAN](https://arxiv.org/abs/1703.05192)
  14. [Adversarial Feature Learning](https://arxiv.org/abs/1605.09782) & [Adversarially Learned Inference](https://arxiv.org/abs/1606.00704)
  15. [Boundary Equilibrium GAN](https://arxiv.org/abs/1703.10717)
  16. [Improved Training for Wasserstein GAN](https://arxiv.org/abs/1704.00028)
  17. [DualGAN](https://arxiv.org/abs/1704.02510)
  18. [MAGAN: Margin Adaptation for GAN](https://arxiv.org/abs/1704.03817)
  19. [Softmax GAN](https://arxiv.org/abs/1704.06191)
  20. [GibbsNet](https://papers.nips.cc/paper/7094-gibbsnet-iterative-adversarial-inference-for-deep-graphical-models.pdf)

#### Variational Autoencoder (VAE)
  1. [Vanilla VAE](https://arxiv.org/abs/1312.6114)
  2. [Conditional VAE](https://arxiv.org/abs/1406.5298)
  3. [Denoising VAE](https://arxiv.org/abs/1511.06406)
  4. [Adversarial Autoencoder](https://arxiv.org/abs/1511.05644)
  5. [Adversarial Variational Bayes](https://arxiv.org/abs/1701.04722)

#### Restricted Boltzmann Machine (RBM)
  1. [Binary RBM with Contrastive Divergence](http://www.cs.toronto.edu/~fritz/absps/cdmiguel.pdf)
  2. [Binary RBM with Persistent Contrastive Divergence](http://www.cs.toronto.edu/~tijmen/pcd/pcd.pdf)

#### Helmholtz Machine
  1. [Binary Helmholtz Machine with Wake-Sleep Algorithm](http://www.cs.toronto.edu/~fritz/absps/ws.pdf)

## Dependencies

1. Install miniconda <http://conda.pydata.org/miniconda.html>
2. Do `conda env create`
3. Enter the env `source activate generative-models`
4. Install [Tensorflow](https://www.tensorflow.org/get_started/os_setup)
5. Install [Pytorch](https://github.com/pytorch/pytorch#installation)


================================================
FILE: VAE/adversarial_autoencoder/aae_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 5
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-3


# Encoder
Q = torch.nn.Sequential(
    torch.nn.Linear(X_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, z_dim)
)

# Decoder
P = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)

# Discriminator
D = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1),
    torch.nn.Sigmoid()
)


def reset_grad():
    Q.zero_grad()
    P.zero_grad()
    D.zero_grad()


def sample_X(size, include_y=False):
    X, y = mnist.train.next_batch(size)
    X = Variable(torch.from_numpy(X))

    if include_y:
        y = np.argmax(y, axis=1).astype(np.int)
        y = Variable(torch.from_numpy(y))
        return X, y

    return X


Q_solver = optim.Adam(Q.parameters(), lr=lr)
P_solver = optim.Adam(P.parameters(), lr=lr)
D_solver = optim.Adam(D.parameters(), lr=lr)


for it in range(1000000):
    X = sample_X(mb_size)

    """ Reconstruction phase """
    z_sample = Q(X)
    X_sample = P(z_sample)

    recon_loss = nn.binary_cross_entropy(X_sample, X)

    recon_loss.backward()
    P_solver.step()
    Q_solver.step()
    reset_grad()

    """ Regularization phase """
    # Discriminator
    z_real = Variable(torch.randn(mb_size, z_dim))
    z_fake = Q(X)

    D_real = D(z_real)
    D_fake = D(z_fake)

    D_loss = -torch.mean(torch.log(D_real) + torch.log(1 - D_fake))

    D_loss.backward()
    D_solver.step()
    reset_grad()

    # Generator
    z_fake = Q(X)
    D_fake = D(z_fake)

    G_loss = -torch.mean(torch.log(D_fake))

    G_loss.backward()
    Q_solver.step()
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; D_loss: {:.4}; G_loss: {:.4}; recon_loss: {:.4}'
              .format(it, D_loss.data[0], G_loss.data[0], recon_loss.data[0]))

        samples = P(z_real).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'
                    .format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: VAE/adversarial_autoencoder/aae_tensorflow.py
================================================
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
c = 0
lr = 1e-3


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


""" Q(z|X) """
X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

Q_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
Q_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

Q_W2 = tf.Variable(xavier_init([h_dim, z_dim]))
Q_b2 = tf.Variable(tf.zeros(shape=[z_dim]))

theta_Q = [Q_W1, Q_W2, Q_b1, Q_b2]


def Q(X):
    h = tf.nn.relu(tf.matmul(X, Q_W1) + Q_b1)
    z = tf.matmul(h, Q_W2) + Q_b2
    return z


""" P(X|z) """
P_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
P_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

P_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
P_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_P = [P_W1, P_W2, P_b1, P_b2]


def P(z):
    h = tf.nn.relu(tf.matmul(z, P_W1) + P_b1)
    logits = tf.matmul(h, P_W2) + P_b2
    prob = tf.nn.sigmoid(logits)
    return prob, logits


""" D(z) """
D_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_b1, D_b2]


def D(z):
    h = tf.nn.relu(tf.matmul(z, D_W1) + D_b1)
    logits = tf.matmul(h, D_W2) + D_b2
    prob = tf.nn.sigmoid(logits)
    return prob


""" Training """
z_sample = Q(X)
_, logits = P(z_sample)

# Sample from random z
X_samples, _ = P(z)

# E[log P(X|z)]
recon_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X))

# Adversarial loss to approx. Q(z|X)
D_real = D(z)
D_fake = D(z_sample)

D_loss = -tf.reduce_mean(tf.log(D_real) + tf.log(1. - D_fake))
G_loss = -tf.reduce_mean(tf.log(D_fake))

AE_solver = tf.train.AdamOptimizer().minimize(recon_loss, var_list=theta_P + theta_Q)
D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_Q)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X_mb, _ = mnist.train.next_batch(mb_size)
    z_mb = np.random.randn(mb_size, z_dim)

    _, recon_loss_curr = sess.run([AE_solver, recon_loss], feed_dict={X: X_mb})
    _, D_loss_curr = sess.run([D_solver, D_loss], feed_dict={X: X_mb, z: z_mb})
    _, G_loss_curr = sess.run([G_solver, G_loss], feed_dict={X: X_mb})

    if it % 1000 == 0:
        print('Iter: {}; D_loss: {:.4}; G_loss: {:.4}; Recon_loss: {:.4}'
              .format(it, D_loss_curr, G_loss_curr, recon_loss_curr))

        samples = sess.run(X_samples, feed_dict={z: np.random.randn(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: VAE/adversarial_vb/avb_pytorch.py
================================================
import torch
import torch.nn
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
eps_dim = 4
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-3


def log(x):
    return torch.log(x + 1e-8)


# Encoder: q(z|x,eps)
Q = torch.nn.Sequential(
    torch.nn.Linear(X_dim + eps_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, z_dim)
)

# Decoder: p(x|z)
P = torch.nn.Sequential(
    torch.nn.Linear(z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, X_dim),
    torch.nn.Sigmoid()
)

# Discriminator: T(X, z)
T = torch.nn.Sequential(
    torch.nn.Linear(X_dim + z_dim, h_dim),
    torch.nn.ReLU(),
    torch.nn.Linear(h_dim, 1)
)


def reset_grad():
    Q.zero_grad()
    P.zero_grad()
    T.zero_grad()


def sample_X(size, include_y=False):
    X, y = mnist.train.next_batch(size)
    X = Variable(torch.from_numpy(X))

    if include_y:
        y = np.argmax(y, axis=1).astype(np.int)
        y = Variable(torch.from_numpy(y))
        return X, y

    return X


Q_solver = optim.Adam(Q.parameters(), lr=lr)
P_solver = optim.Adam(P.parameters(), lr=lr)
T_solver = optim.Adam(T.parameters(), lr=lr)


for it in range(1000000):
    X = sample_X(mb_size)
    eps = Variable(torch.randn(mb_size, eps_dim))
    z = Variable(torch.randn(mb_size, z_dim))

    # Optimize VAE
    z_sample = Q(torch.cat([X, eps], 1))
    X_sample = P(z_sample)
    T_sample = T(torch.cat([X, z_sample], 1))

    disc = torch.mean(-T_sample)
    loglike = -nn.binary_cross_entropy(X_sample, X, size_average=False) / mb_size

    elbo = -(disc + loglike)

    elbo.backward()
    Q_solver.step()
    P_solver.step()
    reset_grad()

    # Discriminator T(X, z)
    z_sample = Q(torch.cat([X, eps], 1))
    T_q = nn.sigmoid(T(torch.cat([X, z_sample], 1)))
    T_prior = nn.sigmoid(T(torch.cat([X, z], 1)))

    T_loss = -torch.mean(log(T_q) + log(1. - T_prior))

    T_loss.backward()
    T_solver.step()
    reset_grad()

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; ELBO: {:.4}; T_loss: {:.4}'
              .format(it, -elbo.data[0], -T_loss.data[0]))

        samples = P(z).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'
                    .format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: VAE/adversarial_vb/avb_tensorflow.py
================================================
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 32
z_dim = 10
eps_dim = 4
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
c = 0
lr = 1e-3


def log(x):
    return tf.log(x + 1e-8)


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


""" Q(z|X,eps) """
X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])
eps = tf.placeholder(tf.float32, shape=[None, eps_dim])

Q_W1 = tf.Variable(xavier_init([X_dim + eps_dim, h_dim]))
Q_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
Q_W2 = tf.Variable(xavier_init([h_dim, z_dim]))
Q_b2 = tf.Variable(tf.zeros(shape=[z_dim]))

theta_Q = [Q_W1, Q_W2, Q_b1, Q_b2]


def Q(X, eps):
    inputs = tf.concat(axis=1, values=[X, eps])
    h = tf.nn.relu(tf.matmul(inputs, Q_W1) + Q_b1)
    z = tf.matmul(h, Q_W2) + Q_b2
    return z


""" P(X|z) """
P_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
P_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
P_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
P_b2 = tf.Variable(tf.zeros(shape=[X_dim]))

theta_P = [P_W1, P_W2, P_b1, P_b2]


def P(z):
    h = tf.nn.relu(tf.matmul(z, P_W1) + P_b1)
    logits = tf.matmul(h, P_W2) + P_b2
    prob = tf.nn.sigmoid(logits)
    return prob, logits


""" D(z) """
D_W1 = tf.Variable(xavier_init([X_dim + z_dim, h_dim]))
D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))
D_W2 = tf.Variable(xavier_init([h_dim, 1]))
D_b2 = tf.Variable(tf.zeros(shape=[1]))

theta_D = [D_W1, D_W2, D_b1, D_b2]


def D(X, z):
    inputs = tf.concat([X, z], axis=1)
    h = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1)
    return tf.matmul(h, D_W2) + D_b2


""" Training """
z_sample = Q(X, eps)
_, X_logits = P(z_sample)
D_sample = D(X, z_sample)

D_q = tf.nn.sigmoid(D(X, z_sample))
D_prior = tf.nn.sigmoid(D(X, z))

# Sample from random z
X_samples, _ = P(z)

disc = tf.reduce_mean(-D_sample)
nll = tf.reduce_sum(
    tf.nn.sigmoid_cross_entropy_with_logits(logits=X_logits, labels=X),
    axis=1
)
loglike = -tf.reduce_mean(nll)

elbo = disc + loglike
D_loss = tf.reduce_mean(log(D_q) + log(1. - D_prior))

VAE_solver = tf.train.AdamOptimizer().minimize(-elbo, var_list=theta_P+theta_Q)
D_solver = tf.train.AdamOptimizer().minimize(-D_loss, var_list=theta_D)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X_mb, _ = mnist.train.next_batch(mb_size)
    eps_mb = np.random.randn(mb_size, eps_dim)
    z_mb = np.random.randn(mb_size, z_dim)

    _, elbo_curr = sess.run([VAE_solver, elbo],
                            feed_dict={X: X_mb, eps: eps_mb, z: z_mb})

    _, D_loss_curr = sess.run([D_solver, D_loss],
                              feed_dict={X: X_mb, eps: eps_mb, z: z_mb})

    if it % 1000 == 0:
        print('Iter: {}; ELBO: {:.4}; D_Loss: {:.4}'
              .format(it, elbo_curr, D_loss_curr))

        samples = sess.run(X_samples, feed_dict={z: np.random.randn(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: VAE/conditional_vae/cvae_pytorch.py
================================================
import torch
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 64
Z_dim = 100
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
cnt = 0
lr = 1e-3


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / np.sqrt(in_dim / 2.)
    return Variable(torch.randn(*size) * xavier_stddev, requires_grad=True)


# =============================== Q(z|X) ======================================

Wxh = xavier_init(size=[X_dim + y_dim, h_dim])
bxh = Variable(torch.zeros(h_dim), requires_grad=True)

Whz_mu = xavier_init(size=[h_dim, Z_dim])
bhz_mu = Variable(torch.zeros(Z_dim), requires_grad=True)

Whz_var = xavier_init(size=[h_dim, Z_dim])
bhz_var = Variable(torch.zeros(Z_dim), requires_grad=True)


def Q(X, c):
    inputs = torch.cat([X, c], 1)
    h = nn.relu(inputs @ Wxh + bxh.repeat(inputs.size(0), 1))
    z_mu = h @ Whz_mu + bhz_mu.repeat(h.size(0), 1)
    z_var = h @ Whz_var + bhz_var.repeat(h.size(0), 1)
    return z_mu, z_var


def sample_z(mu, log_var):
    eps = Variable(torch.randn(mb_size, Z_dim))
    return mu + torch.exp(log_var / 2) * eps


# =============================== P(X|z) ======================================

Wzh = xavier_init(size=[Z_dim + y_dim, h_dim])
bzh = Variable(torch.zeros(h_dim), requires_grad=True)

Whx = xavier_init(size=[h_dim, X_dim])
bhx = Variable(torch.zeros(X_dim), requires_grad=True)


def P(z, c):
    inputs = torch.cat([z, c], 1)
    h = nn.relu(inputs @ Wzh + bzh.repeat(inputs.size(0), 1))
    X = nn.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1))
    return X


# =============================== TRAINING ====================================

params = [Wxh, bxh, Whz_mu, bhz_mu, Whz_var, bhz_var,
          Wzh, bzh, Whx, bhx]

solver = optim.Adam(params, lr=lr)

for it in range(100000):
    X, c = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))
    c = Variable(torch.from_numpy(c.astype('float32')))

    # Forward
    z_mu, z_var = Q(X, c)
    z = sample_z(z_mu, z_var)
    X_sample = P(z, c)

    # Loss
    recon_loss = nn.binary_cross_entropy(X_sample, X, size_average=False) / mb_size
    kl_loss = torch.mean(0.5 * torch.sum(torch.exp(z_var) + z_mu**2 - 1. - z_var, 1))
    loss = recon_loss + kl_loss

    # Backward
    loss.backward()

    # Update
    solver.step()

    # Housekeeping
    for p in params:
        if p.grad is not None:
            data = p.grad.data
            p.grad = Variable(data.new().resize_as_(data).zero_())

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; Loss: {:.4}'.format(it, loss.data[0]))

        c = np.zeros(shape=[mb_size, y_dim], dtype='float32')
        c[:, np.random.randint(0, 10)] = 1.
        c = Variable(torch.from_numpy(c))
        z = Variable(torch.randn(mb_size, Z_dim))
        samples = P(z, c).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(cnt).zfill(3)), bbox_inches='tight')
        cnt += 1
        plt.close(fig)


================================================
FILE: VAE/conditional_vae/cvae_tensorflow.py
================================================
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 64
z_dim = 100
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
c = 0
lr = 1e-3


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


# =============================== Q(z|X) ======================================

X = tf.placeholder(tf.float32, shape=[None, X_dim])
c = tf.placeholder(tf.float32, shape=[None, y_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

Q_W1 = tf.Variable(xavier_init([X_dim + y_dim, h_dim]))
Q_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

Q_W2_mu = tf.Variable(xavier_init([h_dim, z_dim]))
Q_b2_mu = tf.Variable(tf.zeros(shape=[z_dim]))

Q_W2_sigma = tf.Variable(xavier_init([h_dim, z_dim]))
Q_b2_sigma = tf.Variable(tf.zeros(shape=[z_dim]))


def Q(X, c):
    inputs = tf.concat(axis=1, values=[X, c])
    h = tf.nn.relu(tf.matmul(inputs, Q_W1) + Q_b1)
    z_mu = tf.matmul(h, Q_W2_mu) + Q_b2_mu
    z_logvar = tf.matmul(h, Q_W2_sigma) + Q_b2_sigma
    return z_mu, z_logvar


def sample_z(mu, log_var):
    eps = tf.random_normal(shape=tf.shape(mu))
    return mu + tf.exp(log_var / 2) * eps


# =============================== P(X|z) ======================================

P_W1 = tf.Variable(xavier_init([z_dim + y_dim, h_dim]))
P_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

P_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
P_b2 = tf.Variable(tf.zeros(shape=[X_dim]))


def P(z, c):
    inputs = tf.concat(axis=1, values=[z, c])
    h = tf.nn.relu(tf.matmul(inputs, P_W1) + P_b1)
    logits = tf.matmul(h, P_W2) + P_b2
    prob = tf.nn.sigmoid(logits)
    return prob, logits


# =============================== TRAINING ====================================

z_mu, z_logvar = Q(X, c)
z_sample = sample_z(z_mu, z_logvar)
_, logits = P(z_sample, c)

# Sampling from random z
X_samples, _ = P(z, c)

# E[log P(X|z)]
recon_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X), 1)
# D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist. are Gaussian
kl_loss = 0.5 * tf.reduce_sum(tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, 1)
# VAE loss
vae_loss = tf.reduce_mean(recon_loss + kl_loss)

solver = tf.train.AdamOptimizer().minimize(vae_loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X_mb, y_mb = mnist.train.next_batch(mb_size)

    _, loss = sess.run([solver, vae_loss], feed_dict={X: X_mb, c: y_mb})

    if it % 1000 == 0:
        print('Iter: {}'.format(it))
        print('Loss: {:.4}'. format(loss))
        print()

        y = np.zeros(shape=[16, y_dim])
        y[:, np.random.randint(0, y_dim)] = 1.

        samples = sess.run(X_samples,
                           feed_dict={z: np.random.randn(16, z_dim), c: y})

        fig = plot(samples)
        plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: VAE/denoising_vae/dvae_pytorch.py
================================================
import torch
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 64
Z_dim = 100
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
c = 0
lr = 1e-3
noise_factor = .25


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / np.sqrt(in_dim / 2.)
    return Variable(torch.randn(*size) * xavier_stddev, requires_grad=True)


""" Q(z|X) """
Wxh = xavier_init(size=[X_dim, h_dim])
bxh = Variable(torch.zeros(h_dim), requires_grad=True)

Whz_mu = xavier_init(size=[h_dim, Z_dim])
bhz_mu = Variable(torch.zeros(Z_dim), requires_grad=True)

Whz_var = xavier_init(size=[h_dim, Z_dim])
bhz_var = Variable(torch.zeros(Z_dim), requires_grad=True)


def Q(X):
    h = nn.relu(X @ Wxh + bxh.repeat(X.size(0), 1))
    z_mu = h @ Whz_mu + bhz_mu.repeat(h.size(0), 1)
    z_var = h @ Whz_var + bhz_var.repeat(h.size(0), 1)
    return z_mu, z_var


def sample_z(mu, log_var):
    eps = Variable(torch.randn(mb_size, Z_dim))
    return mu + torch.exp(log_var / 2) * eps


""" P(X|z) """
Wzh = xavier_init(size=[Z_dim, h_dim])
bzh = Variable(torch.zeros(h_dim), requires_grad=True)

Whx = xavier_init(size=[h_dim, X_dim])
bhx = Variable(torch.zeros(X_dim), requires_grad=True)


def P(z):
    h = nn.relu(z @ Wzh + bzh.repeat(z.size(0), 1))
    X = nn.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1))
    return X


""" Training """
params = [Wxh, bxh, Whz_mu, bhz_mu, Whz_var, bhz_var,
          Wzh, bzh, Whx, bhx]

solver = optim.Adam(params, lr=lr)

for it in range(100000):
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))

    # Add noise
    X_noise = X + noise_factor * Variable(torch.randn(X.size()))
    X_noise.data.clamp_(0., 1.)

    # Forward
    z_mu, z_var = Q(X_noise)
    z = sample_z(z_mu, z_var)
    X_sample = P(z)

    torch.nn.BCELoss
    recon_loss = nn.binary_cross_entropy(X_sample, X, size_average=False) / mb_size
    kl_loss = torch.mean(0.5 * torch.sum(torch.exp(z_var) + z_mu**2 - 1. - z_var, 1))
    loss = recon_loss + kl_loss

    # Backward
    loss.backward()

    # Update
    solver.step()

    # Housekeeping
    for p in params:
        if p.grad is not None:
            data = p.grad.data
            p.grad = Variable(data.new().resize_as_(data).zero_())

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; Loss: {:.4}'.format(it, loss.data[0]))

        z = Variable(torch.randn(mb_size, Z_dim))
        samples = P(z).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(c).zfill(3)), bbox_inches='tight')
        c += 1
        plt.close(fig)


================================================
FILE: VAE/denoising_vae/dvae_tensorflow.py
================================================
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 64
z_dim = 100
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
c = 0
lr = 1e-3
noise_factor = 0.25


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


""" Q(X|z) """
X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

Q_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
Q_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

Q_W2_mu = tf.Variable(xavier_init([h_dim, z_dim]))
Q_b2_mu = tf.Variable(tf.zeros(shape=[z_dim]))

Q_W2_sigma = tf.Variable(xavier_init([h_dim, z_dim]))
Q_b2_sigma = tf.Variable(tf.zeros(shape=[z_dim]))


def Q(X):
    h = tf.nn.relu(tf.matmul(X, Q_W1) + Q_b1)
    z_mu = tf.matmul(h, Q_W2_mu) + Q_b2_mu
    z_logvar = tf.matmul(h, Q_W2_sigma) + Q_b2_sigma
    return z_mu, z_logvar


def sample_z(mu, log_var):
    eps = tf.random_normal(shape=tf.shape(mu))
    return mu + tf.exp(log_var / 2) * eps


""" P(X|z) """
P_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
P_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

P_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
P_b2 = tf.Variable(tf.zeros(shape=[X_dim]))


def P(z):
    h = tf.nn.relu(tf.matmul(z, P_W1) + P_b1)
    logits = tf.matmul(h, P_W2) + P_b2
    prob = tf.nn.sigmoid(logits)
    return prob, logits


""" Training """
# Add noise to X
X_noise = X + noise_factor * tf.random_normal(tf.shape(X))
X_noise = tf.clip_by_value(X_noise, 0., 1.)

z_mu, z_logvar = Q(X_noise)
z_sample = sample_z(z_mu, z_logvar)
_, logits = P(z_sample)

# Sample from random z
X_samples, _ = P(z)

# E[log P(X|z)]
recon_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X), 1)
# D_KL(Q(z|X_noise) || P(z|X)); calculate in closed form as both dist. are Gaussian
kl_loss = 0.5 * tf.reduce_sum(tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, 1)
# VAE loss
vae_loss = tf.reduce_mean(recon_loss + kl_loss)

solver = tf.train.AdamOptimizer().minimize(vae_loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X_mb, _ = mnist.train.next_batch(mb_size)

    _, loss = sess.run([solver, vae_loss], feed_dict={X: X_mb})

    if it % 1000 == 0:
        print('Iter: {}; Loss: {:.4}'.format(it, loss))

        samples = sess.run(X_samples, feed_dict={z: np.random.randn(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: VAE/vanilla_vae/vae_pytorch.py
================================================
import torch
import torch.nn.functional as nn
import torch.autograd as autograd
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from torch.autograd import Variable
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 64
Z_dim = 100
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
c = 0
lr = 1e-3


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / np.sqrt(in_dim / 2.)
    return Variable(torch.randn(*size) * xavier_stddev, requires_grad=True)


# =============================== Q(z|X) ======================================

Wxh = xavier_init(size=[X_dim, h_dim])
bxh = Variable(torch.zeros(h_dim), requires_grad=True)

Whz_mu = xavier_init(size=[h_dim, Z_dim])
bhz_mu = Variable(torch.zeros(Z_dim), requires_grad=True)

Whz_var = xavier_init(size=[h_dim, Z_dim])
bhz_var = Variable(torch.zeros(Z_dim), requires_grad=True)


def Q(X):
    h = nn.relu(X @ Wxh + bxh.repeat(X.size(0), 1))
    z_mu = h @ Whz_mu + bhz_mu.repeat(h.size(0), 1)
    z_var = h @ Whz_var + bhz_var.repeat(h.size(0), 1)
    return z_mu, z_var


def sample_z(mu, log_var):
    eps = Variable(torch.randn(mb_size, Z_dim))
    return mu + torch.exp(log_var / 2) * eps


# =============================== P(X|z) ======================================

Wzh = xavier_init(size=[Z_dim, h_dim])
bzh = Variable(torch.zeros(h_dim), requires_grad=True)

Whx = xavier_init(size=[h_dim, X_dim])
bhx = Variable(torch.zeros(X_dim), requires_grad=True)


def P(z):
    h = nn.relu(z @ Wzh + bzh.repeat(z.size(0), 1))
    X = nn.sigmoid(h @ Whx + bhx.repeat(h.size(0), 1))
    return X


# =============================== TRAINING ====================================

params = [Wxh, bxh, Whz_mu, bhz_mu, Whz_var, bhz_var,
          Wzh, bzh, Whx, bhx]

solver = optim.Adam(params, lr=lr)

for it in range(100000):
    X, _ = mnist.train.next_batch(mb_size)
    X = Variable(torch.from_numpy(X))

    # Forward
    z_mu, z_var = Q(X)
    z = sample_z(z_mu, z_var)
    X_sample = P(z)

    # Loss
    recon_loss = nn.binary_cross_entropy(X_sample, X, size_average=False) / mb_size
    kl_loss = torch.mean(0.5 * torch.sum(torch.exp(z_var) + z_mu**2 - 1. - z_var, 1))
    loss = recon_loss + kl_loss

    # Backward
    loss.backward()

    # Update
    solver.step()

    # Housekeeping
    for p in params:
        if p.grad is not None:
            data = p.grad.data
            p.grad = Variable(data.new().resize_as_(data).zero_())

    # Print and plot every now and then
    if it % 1000 == 0:
        print('Iter-{}; Loss: {:.4}'.format(it, loss.data[0]))

        samples = P(z).data.numpy()[:16]

        fig = plt.figure(figsize=(4, 4))
        gs = gridspec.GridSpec(4, 4)
        gs.update(wspace=0.05, hspace=0.05)

        for i, sample in enumerate(samples):
            ax = plt.subplot(gs[i])
            plt.axis('off')
            ax.set_xticklabels([])
            ax.set_yticklabels([])
            ax.set_aspect('equal')
            plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

        if not os.path.exists('out/'):
            os.makedirs('out/')

        plt.savefig('out/{}.png'.format(str(c).zfill(3)), bbox_inches='tight')
        c += 1
        plt.close(fig)


================================================
FILE: VAE/vanilla_vae/vae_tensorflow.py
================================================
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import os
from tensorflow.examples.tutorials.mnist import input_data


mnist = input_data.read_data_sets('../../MNIST_data', one_hot=True)
mb_size = 64
z_dim = 100
X_dim = mnist.train.images.shape[1]
y_dim = mnist.train.labels.shape[1]
h_dim = 128
c = 0
lr = 1e-3


def plot(samples):
    fig = plt.figure(figsize=(4, 4))
    gs = gridspec.GridSpec(4, 4)
    gs.update(wspace=0.05, hspace=0.05)

    for i, sample in enumerate(samples):
        ax = plt.subplot(gs[i])
        plt.axis('off')
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_aspect('equal')
        plt.imshow(sample.reshape(28, 28), cmap='Greys_r')

    return fig


def xavier_init(size):
    in_dim = size[0]
    xavier_stddev = 1. / tf.sqrt(in_dim / 2.)
    return tf.random_normal(shape=size, stddev=xavier_stddev)


# =============================== Q(z|X) ======================================

X = tf.placeholder(tf.float32, shape=[None, X_dim])
z = tf.placeholder(tf.float32, shape=[None, z_dim])

Q_W1 = tf.Variable(xavier_init([X_dim, h_dim]))
Q_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

Q_W2_mu = tf.Variable(xavier_init([h_dim, z_dim]))
Q_b2_mu = tf.Variable(tf.zeros(shape=[z_dim]))

Q_W2_sigma = tf.Variable(xavier_init([h_dim, z_dim]))
Q_b2_sigma = tf.Variable(tf.zeros(shape=[z_dim]))


def Q(X):
    h = tf.nn.relu(tf.matmul(X, Q_W1) + Q_b1)
    z_mu = tf.matmul(h, Q_W2_mu) + Q_b2_mu
    z_logvar = tf.matmul(h, Q_W2_sigma) + Q_b2_sigma
    return z_mu, z_logvar


def sample_z(mu, log_var):
    eps = tf.random_normal(shape=tf.shape(mu))
    return mu + tf.exp(log_var / 2) * eps


# =============================== P(X|z) ======================================

P_W1 = tf.Variable(xavier_init([z_dim, h_dim]))
P_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

P_W2 = tf.Variable(xavier_init([h_dim, X_dim]))
P_b2 = tf.Variable(tf.zeros(shape=[X_dim]))


def P(z):
    h = tf.nn.relu(tf.matmul(z, P_W1) + P_b1)
    logits = tf.matmul(h, P_W2) + P_b2
    prob = tf.nn.sigmoid(logits)
    return prob, logits


# =============================== TRAINING ====================================

z_mu, z_logvar = Q(X)
z_sample = sample_z(z_mu, z_logvar)
_, logits = P(z_sample)

# Sampling from random z
X_samples, _ = P(z)

# E[log P(X|z)]
recon_loss = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=X), 1)
# D_KL(Q(z|X) || P(z)); calculate in closed form as both dist. are Gaussian
kl_loss = 0.5 * tf.reduce_sum(tf.exp(z_logvar) + z_mu**2 - 1. - z_logvar, 1)
# VAE loss
vae_loss = tf.reduce_mean(recon_loss + kl_loss)

solver = tf.train.AdamOptimizer().minimize(vae_loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

if not os.path.exists('out/'):
    os.makedirs('out/')

i = 0

for it in range(1000000):
    X_mb, _ = mnist.train.next_batch(mb_size)

    _, loss = sess.run([solver, vae_loss], feed_dict={X: X_mb})

    if it % 1000 == 0:
        print('Iter: {}'.format(it))
        print('Loss: {:.4}'. format(loss))
        print()

        samples = sess.run(X_samples, feed_dict={z: np.random.randn(16, z_dim)})

        fig = plot(samples)
        plt.savefig('out/{}.png'.format(str(i).zfill(3)), bbox_inches='tight')
        i += 1
        plt.close(fig)


================================================
FILE: environment.yml
================================================
name: generative-models
dependencies:
- python=3.5.1
- numpy=1.11.0
- scikit-learn=0.17.1
- scipy=0.17.1
- matplotlib=1.5.3
- pip:
    - keras==1.1.1
Download .txt
gitextract_9grku3me/

├── .gitignore
├── GAN/
│   ├── ali_bigan/
│   │   ├── ali_bigan_pytorch.py
│   │   └── ali_bigan_tensorflow.py
│   ├── auxiliary_classifier_gan/
│   │   ├── ac_gan_pytorch.py
│   │   └── ac_gan_tensorflow.py
│   ├── boundary_equilibrium_gan/
│   │   ├── began_pytorch.py
│   │   └── began_tensorflow.py
│   ├── boundary_seeking_gan/
│   │   ├── bgan_pytorch.py
│   │   └── bgan_tensorflow.py
│   ├── conditional_gan/
│   │   ├── cgan_pytorch.py
│   │   └── cgan_tensorflow.py
│   ├── coupled_gan/
│   │   ├── cogan_pytorch.py
│   │   └── cogan_tensorflow.py
│   ├── disco_gan/
│   │   ├── discogan_pytorch.py
│   │   └── discogan_tensorflow.py
│   ├── dual_gan/
│   │   ├── dualgan_pytorch.py
│   │   └── dualgan_tensorflow.py
│   ├── ebgan/
│   │   ├── ebgan_pytorch.py
│   │   └── ebgan_tensorflow.py
│   ├── f_gan/
│   │   ├── f_gan_pytorch.py
│   │   └── f_gan_tensorflow.py
│   ├── generative_adversarial_parallelization/
│   │   └── gap_pytorch.py
│   ├── gibbsnet/
│   │   └── gibbsnet_pytorch.py
│   ├── improved_wasserstein_gan/
│   │   └── wgan_gp_tensorflow.py
│   ├── infogan/
│   │   ├── infogan_pytorch.py
│   │   └── infogan_tensorflow.py
│   ├── least_squares_gan/
│   │   ├── lsgan_pytorch.py
│   │   └── lsgan_tensorflow.py
│   ├── magan/
│   │   ├── magan_pytorch.py
│   │   └── magan_tensorflow.py
│   ├── mode_regularized_gan/
│   │   ├── mode_reg_gan_pytorch.py
│   │   └── mode_reg_gan_tensorflow.py
│   ├── softmax_gan/
│   │   ├── softmax_gan_pytorch.py
│   │   └── softmax_gan_tensorflow.py
│   ├── vanilla_gan/
│   │   ├── gan_pytorch.py
│   │   └── gan_tensorflow.py
│   └── wasserstein_gan/
│       ├── wgan_pytorch.py
│       └── wgan_tensorflow.py
├── HelmholtzMachine/
│   ├── README.md
│   └── vanilla_HM/
│       └── helmholtz.py
├── LICENSE
├── RBM/
│   ├── README.md
│   ├── rbm_binary_cd.py
│   └── rbm_binary_pcd.py
├── README.md
├── VAE/
│   ├── adversarial_autoencoder/
│   │   ├── aae_pytorch.py
│   │   └── aae_tensorflow.py
│   ├── adversarial_vb/
│   │   ├── avb_pytorch.py
│   │   └── avb_tensorflow.py
│   ├── conditional_vae/
│   │   ├── cvae_pytorch.py
│   │   └── cvae_tensorflow.py
│   ├── denoising_vae/
│   │   ├── dvae_pytorch.py
│   │   └── dvae_tensorflow.py
│   └── vanilla_vae/
│       ├── vae_pytorch.py
│       └── vae_tensorflow.py
└── environment.yml
Download .txt
SYMBOL INDEX (216 symbols across 50 files)

FILE: GAN/ali_bigan/ali_bigan_pytorch.py
  function log (line 25) | def log(x):
  function D (line 52) | def D(X, z):
  function reset_grad (line 56) | def reset_grad():

FILE: GAN/ali_bigan/ali_bigan_tensorflow.py
  function plot (line 19) | def plot(samples):
  function xavier_init (line 35) | def xavier_init(size):
  function log (line 41) | def log(x):
  function sample_z (line 67) | def sample_z(m, n):
  function Q (line 71) | def Q(X):
  function P (line 77) | def P(z):
  function D (line 83) | def D(X, z):

FILE: GAN/auxiliary_classifier_gan/ac_gan_pytorch.py
  function G (line 32) | def G(z, c):
  function D (line 52) | def D(X):
  function reset_grad (line 64) | def reset_grad():

FILE: GAN/auxiliary_classifier_gan/ac_gan_tensorflow.py
  function plot (line 21) | def plot(samples):
  function xavier_init (line 37) | def xavier_init(size):
  function generator (line 53) | def generator(z, c):
  function discriminator (line 69) | def discriminator(X):
  function sample_z (line 80) | def sample_z(m, n):
  function cross_entropy (line 84) | def cross_entropy(logit, y):

FILE: GAN/boundary_equilibrium_gan/began_pytorch.py
  function D (line 44) | def D(X):
  function reset_grad (line 50) | def reset_grad():

FILE: GAN/boundary_equilibrium_gan/began_tensorflow.py
  function plot (line 22) | def plot(samples):
  function xavier_init (line 38) | def xavier_init(size):
  function sample_z (line 62) | def sample_z(m, n):
  function G (line 66) | def G(z):
  function D (line 73) | def D(X):

FILE: GAN/boundary_seeking_gan/bgan_pytorch.py
  function log (line 24) | def log(x):
  function reset_grad (line 44) | def reset_grad():

FILE: GAN/boundary_seeking_gan/bgan_tensorflow.py
  function plot (line 19) | def plot(samples):
  function xavier_init (line 35) | def xavier_init(size):
  function log (line 41) | def log(x):
  function sample_z (line 62) | def sample_z(m, n):
  function generator (line 66) | def generator(z):
  function discriminator (line 73) | def discriminator(x):

FILE: GAN/conditional_gan/cgan_pytorch.py
  function xavier_init (line 23) | def xavier_init(size):
  function G (line 38) | def G(z, c):
  function D (line 54) | def D(X, c):
  function reset_grad (line 69) | def reset_grad():

FILE: GAN/conditional_gan/cgan_tensorflow.py
  function xavier_init (line 17) | def xavier_init(size):
  function discriminator (line 36) | def discriminator(x, y):
  function generator (line 57) | def generator(z, y):
  function sample_Z (line 66) | def sample_Z(m, n):
  function plot (line 70) | def plot(samples):

FILE: GAN/coupled_gan/cogan_pytorch.py
  function G1 (line 45) | def G1(z):
  function G2 (line 51) | def G2(z):
  function D1 (line 76) | def D1(X):
  function D2 (line 82) | def D2(X):
  function reset_grad (line 95) | def reset_grad():
  function sample_x (line 117) | def sample_x(X, size):

FILE: GAN/coupled_gan/cogan_tensorflow.py
  function plot (line 22) | def plot(samples):
  function xavier_init (line 38) | def xavier_init(size):
  function G (line 58) | def G(z):
  function D (line 75) | def D(X1, X2):
  function sample_X (line 144) | def sample_X(X, size):
  function sample_z (line 149) | def sample_z(m, n):

FILE: GAN/disco_gan/discogan_pytorch.py
  function log (line 25) | def log(x):
  function plot (line 29) | def plot(samples):
  function reset_grad (line 78) | def reset_grad():
  function sample_x (line 102) | def sample_x(X, size):

FILE: GAN/disco_gan/discogan_tensorflow.py
  function plot (line 20) | def plot(samples):
  function xavier_init (line 36) | def xavier_init(size):
  function log (line 42) | def log(x):
  function D_A (line 75) | def D_A(X):
  function D_B (line 80) | def D_B(X):
  function G_AB (line 85) | def G_AB(X):
  function G_BA (line 90) | def G_BA(X):
  function sample_X (line 150) | def sample_X(X, size):

FILE: GAN/dual_gan/dualgan_pytorch.py
  function log (line 28) | def log(x):
  function reset_grad (line 59) | def reset_grad():
  function sample_x (line 84) | def sample_x(X, size):

FILE: GAN/dual_gan/dualgan_tensorflow.py
  function plot (line 23) | def plot(samples):
  function xavier_init (line 39) | def xavier_init(size):
  function G1 (line 60) | def G1(X1, z):
  function G2 (line 66) | def G2(X2, z):
  function D1 (line 83) | def D1(X):
  function D2 (line 88) | def D2(X):
  function sample_X (line 149) | def sample_X(X, size):
  function sample_z (line 154) | def sample_z(m, n):

FILE: GAN/ebgan/ebgan_pytorch.py
  function D (line 42) | def D(X):
  function reset_grad (line 47) | def reset_grad():

FILE: GAN/ebgan/ebgan_tensorflow.py
  function plot (line 19) | def plot(samples):
  function xavier_init (line 35) | def xavier_init(size):
  function sample_z (line 58) | def sample_z(m, n):
  function generator (line 62) | def generator(z):
  function discriminator (line 69) | def discriminator(X):

FILE: GAN/f_gan/f_gan_pytorch.py
  function log (line 24) | def log(x):
  function reset_grad (line 43) | def reset_grad():

FILE: GAN/f_gan/f_gan_tensorflow.py
  function plot (line 19) | def plot(samples):
  function xavier_init (line 35) | def xavier_init(size):
  function sample_z (line 58) | def sample_z(m, n):
  function generator (line 62) | def generator(z):
  function discriminator (line 69) | def discriminator(x):

FILE: GAN/generative_adversarial_parallelization/gap_pytorch.py
  function log (line 26) | def log(x):
  function reset_grad (line 63) | def reset_grad():

FILE: GAN/gibbsnet/gibbsnet_pytorch.py
  function log (line 26) | def log(x):
  function D (line 53) | def D(X, z):
  function reset_grad (line 57) | def reset_grad():

FILE: GAN/improved_wasserstein_gan/wgan_gp_tensorflow.py
  function plot (line 20) | def plot(samples):
  function xavier_init (line 36) | def xavier_init(size):
  function sample_z (line 64) | def sample_z(m, n):
  function G (line 68) | def G(z):
  function D (line 75) | def D(X):

FILE: GAN/infogan/infogan_pytorch.py
  function xavier_init (line 23) | def xavier_init(size):
  function G (line 38) | def G(z, c):
  function D (line 54) | def D(X):
  function Q (line 69) | def Q(X):
  function reset_grad (line 84) | def reset_grad():
  function sample_c (line 96) | def sample_c(size):

FILE: GAN/infogan/infogan_tensorflow.py
  function xavier_init (line 9) | def xavier_init(size):
  function sample_Z (line 47) | def sample_Z(m, n):
  function sample_c (line 51) | def sample_c(m):
  function generator (line 55) | def generator(z, c):
  function discriminator (line 64) | def discriminator(x):
  function Q (line 72) | def Q(x):
  function plot (line 79) | def plot(samples):

FILE: GAN/least_squares_gan/lsgan_pytorch.py
  function reset_grad (line 40) | def reset_grad():

FILE: GAN/least_squares_gan/lsgan_tensorflow.py
  function plot (line 19) | def plot(samples):
  function xavier_init (line 35) | def xavier_init(size):
  function sample_z (line 58) | def sample_z(m, n):
  function generator (line 62) | def generator(z):
  function discriminator (line 69) | def discriminator(x):

FILE: GAN/magan/magan_pytorch.py
  function D (line 45) | def D(X):
  function reset_grad (line 50) | def reset_grad():

FILE: GAN/magan/magan_tensorflow.py
  function plot (line 21) | def plot(samples):
  function xavier_init (line 37) | def xavier_init(size):
  function sample_z (line 61) | def sample_z(m, n):
  function G (line 65) | def G(z):
  function D (line 72) | def D(X):

FILE: GAN/mode_regularized_gan/mode_reg_gan_pytorch.py
  function log (line 26) | def log(x):
  function reset_grad (line 51) | def reset_grad():
  function sample_X (line 57) | def sample_X(size, include_y=False):

FILE: GAN/mode_regularized_gan/mode_reg_gan_tensorflow.py
  function plot (line 19) | def plot(samples):
  function xavier_init (line 35) | def xavier_init(size):
  function log (line 41) | def log(x):
  function sample_z (line 69) | def sample_z(m, n):
  function encoder (line 73) | def encoder(x):
  function generator (line 79) | def generator(z):
  function discriminator (line 86) | def discriminator(x):

FILE: GAN/softmax_gan/softmax_gan_pytorch.py
  function log (line 24) | def log(x):
  function reset_grad (line 43) | def reset_grad():

FILE: GAN/softmax_gan/softmax_gan_tensorflow.py
  function plot (line 19) | def plot(samples):
  function xavier_init (line 35) | def xavier_init(size):
  function log (line 41) | def log(x):
  function sample_z (line 62) | def sample_z(m, n):
  function G (line 66) | def G(z):
  function D (line 73) | def D(X):

FILE: GAN/vanilla_gan/gan_pytorch.py
  function xavier_init (line 23) | def xavier_init(size):
  function G (line 38) | def G(z):
  function D (line 53) | def D(X):
  function reset_grad (line 67) | def reset_grad():

FILE: GAN/vanilla_gan/gan_tensorflow.py
  function xavier_init (line 9) | def xavier_init(size):
  function sample_Z (line 37) | def sample_Z(m, n):
  function generator (line 41) | def generator(z):
  function discriminator (line 49) | def discriminator(x):
  function plot (line 57) | def plot(samples):

FILE: GAN/wasserstein_gan/wgan_pytorch.py
  function reset_grad (line 39) | def reset_grad():

FILE: GAN/wasserstein_gan/wgan_tensorflow.py
  function plot (line 17) | def plot(samples):
  function xavier_init (line 33) | def xavier_init(size):
  function sample_z (line 61) | def sample_z(m, n):
  function generator (line 65) | def generator(z):
  function discriminator (line 72) | def discriminator(x):

FILE: HelmholtzMachine/vanilla_HM/helmholtz.py
  function sigm (line 30) | def sigm(x):
  function infer (line 34) | def infer(X):
  function generate (line 39) | def generate(H):
  function plot (line 90) | def plot(samples, size, name):

FILE: RBM/rbm_binary_cd.py
  function sigm (line 23) | def sigm(x):
  function infer (line 27) | def infer(X):
  function generate (line 32) | def generate(H):
  function plot (line 91) | def plot(samples, size, name):

FILE: RBM/rbm_binary_pcd.py
  function sigm (line 21) | def sigm(x):
  function infer (line 25) | def infer(X):
  function generate (line 30) | def generate(H):
  function plot (line 77) | def plot(samples, size, name):

FILE: VAE/adversarial_autoencoder/aae_pytorch.py
  function reset_grad (line 48) | def reset_grad():
  function sample_X (line 54) | def sample_X(size, include_y=False):

FILE: VAE/adversarial_autoencoder/aae_tensorflow.py
  function plot (line 19) | def plot(samples):
  function xavier_init (line 35) | def xavier_init(size):
  function Q (line 54) | def Q(X):
  function P (line 70) | def P(z):
  function D (line 87) | def D(z):

FILE: VAE/adversarial_vb/avb_pytorch.py
  function log (line 25) | def log(x):
  function reset_grad (line 52) | def reset_grad():
  function sample_X (line 58) | def sample_X(size, include_y=False):

FILE: VAE/adversarial_vb/avb_tensorflow.py
  function log (line 21) | def log(x):
  function plot (line 25) | def plot(samples):
  function xavier_init (line 41) | def xavier_init(size):
  function Q (line 60) | def Q(X, eps):
  function P (line 76) | def P(z):
  function D (line 92) | def D(X, z):

FILE: VAE/conditional_vae/cvae_pytorch.py
  function xavier_init (line 23) | def xavier_init(size):
  function Q (line 41) | def Q(X, c):
  function sample_z (line 49) | def sample_z(mu, log_var):
  function P (line 63) | def P(z, c):

FILE: VAE/conditional_vae/cvae_tensorflow.py
  function plot (line 20) | def plot(samples):
  function xavier_init (line 36) | def xavier_init(size):
  function Q (line 58) | def Q(X, c):
  function sample_z (line 66) | def sample_z(mu, log_var):
  function P (line 80) | def P(z, c):

FILE: VAE/denoising_vae/dvae_pytorch.py
  function xavier_init (line 24) | def xavier_init(size):
  function Q (line 41) | def Q(X):
  function sample_z (line 48) | def sample_z(mu, log_var):
  function P (line 61) | def P(z):

FILE: VAE/denoising_vae/dvae_tensorflow.py
  function plot (line 21) | def plot(samples):
  function xavier_init (line 37) | def xavier_init(size):
  function Q (line 57) | def Q(X):
  function sample_z (line 64) | def sample_z(mu, log_var):
  function P (line 77) | def P(z):

FILE: VAE/vanilla_vae/vae_pytorch.py
  function xavier_init (line 23) | def xavier_init(size):
  function Q (line 41) | def Q(X):
  function sample_z (line 48) | def sample_z(mu, log_var):
  function P (line 62) | def P(z):

FILE: VAE/vanilla_vae/vae_tensorflow.py
  function plot (line 19) | def plot(samples):
  function xavier_init (line 35) | def xavier_init(size):
  function Q (line 56) | def Q(X):
  function sample_z (line 63) | def sample_z(mu, log_var):
  function P (line 77) | def P(z):
Condensed preview — 56 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (190K chars).
[
  {
    "path": ".gitignore",
    "chars": 1218,
    "preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
  },
  {
    "path": "GAN/ali_bigan/ali_bigan_pytorch.py",
    "chars": 2748,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/ali_bigan/ali_bigan_tensorflow.py",
    "chars": 3308,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/auxiliary_classifier_gan/ac_gan_pytorch.py",
    "chars": 3636,
    "preview": "import torch\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as optim\nimport numpy"
  },
  {
    "path": "GAN/auxiliary_classifier_gan/ac_gan_tensorflow.py",
    "chars": 3900,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/boundary_equilibrium_gan/began_pytorch.py",
    "chars": 2602,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/boundary_equilibrium_gan/began_tensorflow.py",
    "chars": 3134,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/boundary_seeking_gan/bgan_pytorch.py",
    "chars": 2366,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/boundary_seeking_gan/bgan_tensorflow.py",
    "chars": 3093,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/conditional_gan/cgan_pytorch.py",
    "chars": 3923,
    "preview": "import torch\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as optim\nimport numpy"
  },
  {
    "path": "GAN/conditional_gan/cgan_tensorflow.py",
    "chars": 3786,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/coupled_gan/cogan_pytorch.py",
    "chars": 4595,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/coupled_gan/cogan_tensorflow.py",
    "chars": 4974,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/disco_gan/discogan_pytorch.py",
    "chars": 4471,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/disco_gan/discogan_tensorflow.py",
    "chars": 5092,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/dual_gan/dualgan_pytorch.py",
    "chars": 4667,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/dual_gan/dualgan_tensorflow.py",
    "chars": 5231,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/ebgan/ebgan_pytorch.py",
    "chars": 2505,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/ebgan/ebgan_tensorflow.py",
    "chars": 3012,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/f_gan/f_gan_pytorch.py",
    "chars": 3350,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/f_gan/f_gan_tensorflow.py",
    "chars": 3798,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/generative_adversarial_parallelization/gap_pytorch.py",
    "chars": 3368,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/gibbsnet/gibbsnet_pytorch.py",
    "chars": 2943,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/improved_wasserstein_gan/wgan_gp_tensorflow.py",
    "chars": 3322,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/infogan/infogan_pytorch.py",
    "chars": 4601,
    "preview": "import torch\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as optim\nimport numpy"
  },
  {
    "path": "GAN/infogan/infogan_tensorflow.py",
    "chars": 4025,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/least_squares_gan/lsgan_pytorch.py",
    "chars": 2430,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/least_squares_gan/lsgan_tensorflow.py",
    "chars": 3174,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/magan/magan_pytorch.py",
    "chars": 3513,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/magan/magan_tensorflow.py",
    "chars": 4136,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/mode_regularized_gan/mode_reg_gan_pytorch.py",
    "chars": 3606,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/mode_regularized_gan/mode_reg_gan_tensorflow.py",
    "chars": 3871,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/softmax_gan/softmax_gan_pytorch.py",
    "chars": 2466,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/softmax_gan/softmax_gan_tensorflow.py",
    "chars": 3123,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/vanilla_gan/gan_pytorch.py",
    "chars": 3585,
    "preview": "import torch\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as optim\nimport numpy"
  },
  {
    "path": "GAN/vanilla_gan/gan_tensorflow.py",
    "chars": 3411,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "GAN/wasserstein_gan/wgan_pytorch.py",
    "chars": 2709,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "GAN/wasserstein_gan/wgan_tensorflow.py",
    "chars": 3148,
    "preview": "import tensorflow as tf\nfrom tensorflow.examples.tutorials.mnist import input_data\nimport numpy as np\nimport matplotlib."
  },
  {
    "path": "HelmholtzMachine/README.md",
    "chars": 206,
    "preview": "# Helmholtz Machines\n\nImplementation of (Binary) Helmholtz Machines.\n\n## Disclaimer\nCurrently the results is not that go"
  },
  {
    "path": "HelmholtzMachine/vanilla_HM/helmholtz.py",
    "chars": 2583,
    "preview": "\"\"\"\nOne layer Binary Helmholtz Machine\n==================================\n\"\"\"\nimport numpy as np\nimport matplotlib.pyplo"
  },
  {
    "path": "LICENSE",
    "chars": 1210,
    "preview": "This is free and unencumbered software released into the public domain.\n\nAnyone is free to copy, modify, publish, use, c"
  },
  {
    "path": "RBM/README.md",
    "chars": 251,
    "preview": "# Restricted Boltzmann Machines\n\nImplementation of (Binary) Restricted Boltzmann Machines (RBM). Hidden and visible vari"
  },
  {
    "path": "RBM/rbm_binary_cd.py",
    "chars": 2603,
    "preview": "import numpy as np\nimport matplotlib.pyplot as plt\nimport matplotlib.gridspec as gridspec\nimport os\nfrom tensorflow.exam"
  },
  {
    "path": "RBM/rbm_binary_pcd.py",
    "chars": 2509,
    "preview": "import numpy as np\nimport matplotlib.pyplot as plt\nimport matplotlib.gridspec as gridspec\nimport os\nfrom tensorflow.exam"
  },
  {
    "path": "README.md",
    "chars": 2684,
    "preview": "# Generative Models\nCollection of generative models, e.g. GAN, VAE in Pytorch and Tensorflow.\nAlso present here are RBM "
  },
  {
    "path": "VAE/adversarial_autoencoder/aae_pytorch.py",
    "chars": 3035,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "VAE/adversarial_autoencoder/aae_tensorflow.py",
    "chars": 3635,
    "preview": "import tensorflow as tf\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport matplotlib.gridspec as gridspec\nimport"
  },
  {
    "path": "VAE/adversarial_vb/avb_pytorch.py",
    "chars": 3131,
    "preview": "import torch\nimport torch.nn\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as op"
  },
  {
    "path": "VAE/adversarial_vb/avb_tensorflow.py",
    "chars": 3859,
    "preview": "import tensorflow as tf\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport matplotlib.gridspec as gridspec\nimport"
  },
  {
    "path": "VAE/conditional_vae/cvae_pytorch.py",
    "chars": 3756,
    "preview": "import torch\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as optim\nimport numpy"
  },
  {
    "path": "VAE/conditional_vae/cvae_tensorflow.py",
    "chars": 3693,
    "preview": "import tensorflow as tf\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport matplotlib.gridspec as gridspec\nimport"
  },
  {
    "path": "VAE/denoising_vae/dvae_pytorch.py",
    "chars": 3380,
    "preview": "import torch\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as optim\nimport numpy"
  },
  {
    "path": "VAE/denoising_vae/dvae_tensorflow.py",
    "chars": 3290,
    "preview": "import tensorflow as tf\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport matplotlib.gridspec as gridspec\nimport"
  },
  {
    "path": "VAE/vanilla_vae/vae_pytorch.py",
    "chars": 3377,
    "preview": "import torch\nimport torch.nn.functional as nn\nimport torch.autograd as autograd\nimport torch.optim as optim\nimport numpy"
  },
  {
    "path": "VAE/vanilla_vae/vae_tensorflow.py",
    "chars": 3337,
    "preview": "import tensorflow as tf\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport matplotlib.gridspec as gridspec\nimport"
  },
  {
    "path": "environment.yml",
    "chars": 150,
    "preview": "name: generative-models\ndependencies:\n- python=3.5.1\n- numpy=1.11.0\n- scikit-learn=0.17.1\n- scipy=0.17.1\n- matplotlib=1."
  }
]

About this extraction

This page contains the full source code of the wiseodd/generative-models GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 56 files (175.3 KB), approximately 56.0k tokens, and a symbol index with 216 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!