Full Code of yujiali/gmmn for AI

master 6cab7eb72dbe cached
15 files
131.9 KB
37.0k tokens
286 symbols
1 requests
Download .txt
Repository: yujiali/gmmn
Branch: master
Commit: 6cab7eb72dbe
Files: 15
Total size: 131.9 KB

Directory structure:
gitextract_4jb9m0vo/

├── .gitignore
├── README.md
├── core/
│   ├── __init__.py
│   ├── generative.py
│   ├── kernels.py
│   └── util.py
├── dataio/
│   ├── __init__.py
│   ├── mnist.py
│   └── tfd.py
├── eval_mmd_generative_model.py
├── generate_sample_figures.py
├── test.py
├── train.py
├── vistools.py
└── visualize.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
*.pyc
*.swp


================================================
FILE: README.md
================================================
#Generative Moment Matching Networks (GMMNs)
This is the code we used for the following paper:
* Yujia Li, Kevin Swersky, Richard Zemel.  *Generative moment matching networks*.  In International Conference on Machine Learning (ICML), 2015.

If you use this code in your research you should cite the above paper.

### Dependencies
To use the code you need to install some dependencies first:
* Standard python packages like **numpy, scipy, matplotlib**.  matplotlib is only needed for visualization.  You may also need sklearn for some features.
* [**gnumpy**](http://www.cs.toronto.edu/~tijmen/gnumpy.html).  If you have a NVIDIA GPU gnumpy can speed up your computation significantly.  To use GPUs you need to install [**cudamat**](https://github.com/cudamat/cudamat) first.  If you don't have a GPU you can use [**npmat**](http://www.cs.toronto.edu/~ilya/npmat.py) as a replacement for cudamat, then all computations will be done on a CPU.
* The authors' lightweight neural network and optimization packages [**pynn**](https://github.com/yujiali/pynn) and [**pyopt**](https://github.com/yujiali/pyopt).

Once you get all dependencies ready, try to run `python test.py`.  If you are running this with npmat then all tests should pass.  If you are running this on a GPU with cudamat then some tests will fail - this is expected because of the low numeric precision supported by cudamat (`float32` every where), but all tests should run and finish properly.

### Prepare data
Prepare the MNIST and TFD data, then go into the `dataio` directory, change paths to the datasets in `mnist.py` and `tfd.py`.

### Train the models
Use `python train.py -m <mode>` to train the corresponding model.  `<mode>` can be `mnistinput`, `mnistcode`, `tfdinput`, `tfdcode`, corresponding to the input space model and autoencoder code space model for the two datasets.

##### Other resources
There is a tensorflow implementation of GMMN provided by Siddharth Agrawal: https://github.com/siddharth-agrawal/Generative-Moment-Matching-Networks


================================================
FILE: core/__init__.py
================================================


================================================
FILE: core/generative.py
================================================
"""
Generative model using MMD objective.

Yujia Li, 09/2014
"""

import pynn.nn as nn
import pynn.loss as ls
import pynn.learner as learner
import gnumpy as gnp
import numpy as np
import math
import util
import time
import scipy.optimize as spopt

class UnsupervisedMmdLoss(ls.Loss):
    """
    MMD loss for unsupervised learning.

    This loss measures the discrepancy between a distribution given by a 
    neural net model with a data distribution.
    """
    def __init__(self, **kwargs):
        super(UnsupervisedMmdLoss, self).__init__(**kwargs)
        self.sigma = kwargs.get('sigma', 1)

    def load_target(self, target, **kwargs):
        """
        target is the target data distribution, n_cases * n_dims matrix.
        """
        if isinstance(target, gnp.garray):
            self.target = target
        else:
            self.target = gnp.garray(target)

        self.n_target = target.shape[0]

    def _make_s_mat(self, n_pred, n_target):
        """
        Create the S matrix that will be used in loss computation.
        """
        s = gnp.zeros((n_pred + n_target, 2))
        s[:n_pred, 0] = 1.0 / n_pred
        s[n_pred:, 1] = 1.0 / n_target
        s -= 1.0 / (n_pred + n_target)
        return s

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        if not isinstance(pred, gnp.garray):
            pred = gnp.garray(pred)

        n_pred = pred.shape[0]
        W = self._make_s_mat(n_pred, self.n_target)
        X = gnp.concatenate((pred, self.target), axis=0)

        XX = X.dot(X.T)
        if XX.shape[0] > 4000:  # this special case is due to a weird bug in gnumpy
            x = gnp.garray(np.diag(XX.asarray()))
        else:
            x = XX.diag()

        K = gnp.exp(-1.0 / (2 * self.sigma) * (-2 * XX + x + x[:,gnp.newaxis]))
        A = W.dot(W.T) * K

        loss = A.sum()
        a = A.sum(axis=1)
        grad = 2.0 / self.sigma * (A.dot(X) - X * a[:,gnp.newaxis])

        return loss, grad[:n_pred,:]

    def get_name(self):
        return 'mmdgen'

    def get_id(self):
        return 201

    def __repr__(self):
        return 'Loss <%s> w=%g, sigma=%g' % (
                self.get_name(), self.weight, self.sigma)

ls.register_loss(UnsupervisedMmdLoss())

class UnsupervisedMmdLossMultiScale(ls.Loss):
    """
    Multi-scale MMD loss for unsupervised learning.

    This loss measures the discrepancy between a distribution given by a 
    neural net model with a data distribution.
    """
    def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
        super(UnsupervisedMmdLossMultiScale, self).__init__(**kwargs)
        self.sigma = [float(s) for s in sigma]
        self.n_scales = len(sigma)

        if scale_weight is None:
            self.scale_weight = [1.0] * self.n_scales
        else:
            assert(len(scale_weight) == len(sigma))
            self.scale_weight = [float(w) for w in scale_weight]

    def load_target(self, target, **kwargs):
        """
        target is the target data distribution, n_cases * n_dims matrix.
        """
        if isinstance(target, gnp.garray):
            self.target = target
        else:
            self.target = gnp.garray(target)

        self.n_target = target.shape[0]

    def _make_s_mat(self, n_pred, n_target):
        """
        Create the S matrix that will be used in loss computation.
        """
        s = gnp.zeros((n_pred + n_target, 2))
        s[:n_pred, 0] = 1.0 / n_pred
        s[n_pred:, 1] = 1.0 / n_target
        s -= 1.0 / (n_pred + n_target)
        return s

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        if not isinstance(pred, gnp.garray):
            pred = gnp.garray(pred)

        n_pred = pred.shape[0]
        W = self._make_s_mat(n_pred, self.n_target)
        X = gnp.concatenate((pred, self.target), axis=0)

        XX = X.dot(X.T)
        if XX.shape[0] > 4000:  # this special case is due to a weird bug in gnumpy
            x = gnp.garray(np.diag(XX.asarray()))
        else:
            x = XX.diag()

        prod_mat = XX - 0.5 * x - 0.5 * x[:,gnp.newaxis]
        ww = W.dot(W.T)

        loss = 0
        grad = None
        for i in range(self.n_scales):
            K = gnp.exp(1.0 / self.sigma[i] * prod_mat)
            A = self.scale_weight[i] * ww * K
            loss += A.sum()
            a = A.sum(axis=1)
            if grad is None:
                grad = 2.0 / self.sigma[i] * (A.dot(X) - X * a[:,gnp.newaxis])
            else:
                grad += 2.0 / self.sigma[i] * (A.dot(X) - X * a[:,gnp.newaxis])

        return loss, grad[:n_pred,:]

    def get_name(self):
        return 'mmdgen_multiscale'

    def get_id(self):
        return 202

    def __repr__(self):
        return 'Loss <%s> w=%g, sigma=%s, scale_weight=%s' % (
                self.get_name(), self.weight, str(self.sigma), str(self.scale_weight))

ls.register_loss(UnsupervisedMmdLossMultiScale())

class LinearTimeUnsupervisedMmdLoss(ls.Loss):
    """
    MMD loss for unsupervised learning.

    This loss measures the discrepancy between a distribution given by a 
    neural net model with a data distribution.

    This is the linear time estimator proposed by Gretton et al.
    """
    def __init__(self, **kwargs):
        super(LinearTimeUnsupervisedMmdLoss, self).__init__(**kwargs)
        self.use_modified_loss = kwargs.get('use_modified_loss', False)
        self.use_absolute_value = kwargs.get('use_absolute_value', True)
        self.sigma = kwargs.get('sigma', 1)

    def load_target(self, target, **kwargs):
        """
        target is the target data distribution, n_cases * n_dims matrix.
        """
        if isinstance(target, gnp.garray):
            self.target = target
        else:
            self.target = gnp.garray(target)

        self.n_target = target.shape[0]

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        if not isinstance(pred, gnp.garray):
            pred = gnp.garray(pred)

        n_pred = pred.shape[0]
        assert n_pred == self.n_target
        assert n_pred % 2 == 0

        n_half = n_pred / 2

        X = pred[:n_half]
        X_N = pred[n_half:]
        Y = self.target[:n_half]
        Y_N = self.target[n_half:]

        diff_x_xn = X - X_N
        diff_x_yn = X - Y_N
        diff_xn_y = X_N - Y
        diff_y_yn = Y - Y_N


        factor = -0.5 / self.sigma

        k_x_xn = gnp.exp(factor * (diff_x_xn**2).sum(axis=1))
        k_y_yn = gnp.exp(factor * (diff_y_yn**2).sum(axis=1))
        k_x_yn = gnp.exp(factor * (diff_x_yn**2).sum(axis=1))
        k_xn_y = gnp.exp(factor * (diff_xn_y**2).sum(axis=1))

        loss = 1.0 / n_pred * (k_x_xn.sum() + k_y_yn.sum() - k_x_yn.sum() - k_xn_y.sum())
        grad_x = 1.0 / (n_pred * self.sigma) * (k_x_yn[:,gnp.newaxis] * diff_x_yn - k_x_xn[:,gnp.newaxis] * diff_x_xn)
        grad_xn = 1.0 / (n_pred * self.sigma) * (k_xn_y[:,gnp.newaxis] * diff_xn_y + k_x_xn[:,gnp.newaxis] * diff_x_xn)

        if self.use_modified_loss:
            diff_x_y = X - Y
            diff_xn_yn = X_N - Y_N
            k_x_y = gnp.exp(factor * (diff_x_y**2).sum(axis=1))
            k_xn_yn = gnp.exp(factor * (diff_xn_yn**2).sum(axis=1))

            loss += 1.0 / n_pred * (k_x_xn.sum() + k_y_yn.sum() - k_x_y.sum() - k_xn_yn.sum())
            grad_x += 1.0 / (n_pred * self.sigma) * (k_x_y[:,gnp.newaxis] * diff_x_y - k_x_xn[:,gnp.newaxis] * diff_x_xn)
            grad_xn += 1.0 / (n_pred * self.sigma) * (k_xn_yn[:,gnp.newaxis] * diff_xn_yn + k_x_xn[:,gnp.newaxis] * diff_x_xn)

        grad = gnp.concatenate([grad_x, grad_xn], axis=0)

        if self.use_absolute_value and loss < 0:
            loss = -loss
            grad = -grad

        return loss, grad

    def get_name(self):
        return 'linear_time_mmdgen'

    def get_id(self):
        return 203

    def __repr__(self):
        return 'Loss <%s> w=%g, sigma=%g' % (
                self.get_name(), self.weight, self.sigma)

ls.register_loss(LinearTimeUnsupervisedMmdLoss())

class LinearTimeMinibatchUnsupervisedMmdLoss(ls.Loss):
    """
    MMD loss for unsupervised learning.

    This loss measures the discrepancy between a distribution given by a 
    neural net model with a data distribution.

    This is a version where the full MMD is only computed on minibatches,
    therefore the time complexity for a set of N pairs of data points and
    minibatch size M is O(N/M * M^2) = O(NM)
    """
    def __init__(self, **kwargs):
        super(LinearTimeMinibatchUnsupervisedMmdLoss, self).__init__(**kwargs)
        self.sigma = kwargs.get('sigma', 1)
        self.minibatch_size = kwargs.get('minibatch_size', 100)

    def load_target(self, target, **kwargs):
        """
        target is the target data distribution, n_cases * n_dims matrix.
        """
        if isinstance(target, gnp.garray):
            self.target = target
        else:
            self.target = gnp.garray(target)

        self.n_target = target.shape[0]

    def _make_s_mat(self, n_pred, n_target):
        """
        Create the S matrix that will be used in loss computation.
        """
        s = gnp.zeros((n_pred + n_target, 2))
        s[:n_pred, 0] = 1.0 / n_pred
        s[n_pred:, 1] = 1.0 / n_target
        s -= 1.0 / (n_pred + n_target)
        return s

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        if not isinstance(pred, gnp.garray):
            pred = gnp.garray(pred)

        n_pred = pred.shape[0]
        assert n_pred == self.n_target

        W_Full = self._make_s_mat(n_pred, self.n_target)

        loss = 0
        grad = []

        n_batches = (n_pred + self.minibatch_size - 1) / self.minibatch_size
        for i_batch in range(n_batches):
            i_start = i_batch * self.minibatch_size
            if i_batch < n_batches - 1:
                i_end = i_start + self.minibatch_size
            else:
                i_end = n_pred

            X = gnp.concatenate((pred[i_start:i_end], self.target[i_start:i_end]), axis=0)
            W = self._make_s_mat(i_end - i_start, i_end - i_start)
            
            XX = X.dot(X.T)
            if XX.shape[0] > 4000:  # this special case is due to a weird bug in gnumpy
                x = gnp.garray(np.diag(XX.asarray()))
            else:
                x = XX.diag()

            K = gnp.exp(-1.0 / (2 * self.sigma) * (-2 * XX + x + x[:,gnp.newaxis]))
            A = W.dot(W.T) * K

            loss += A.sum()
            a = A.sum(axis=1)
            grad.append((2.0 / self.sigma * (A.dot(X) - X * a[:,gnp.newaxis]))[:(i_end - i_start)])

        return loss / n_batches, gnp.concatenate(grad, axis=0) / n_batches

    def get_name(self):
        return 'linear_time_minibatch_mmdgen'

    def get_id(self):
        return 204

    def __repr__(self):
        return 'Loss <%s> w=%g, sigma=%g, minibatch_size=%d' % (
                self.get_name(), self.weight, self.sigma, self.minibatch_size)

ls.register_loss(LinearTimeMinibatchUnsupervisedMmdLoss())

class RandomFeatureMmdLoss(ls.Loss):
    """
    MMD loss for unsupervised learning.

    This loss measures the discrepancy between a distribution given by a 
    neural net model with a data distribution.

    This is a version where the kernel k(x,y) is estimated by product of random
    features.
    """
    def __init__(self, sigma=[1.0], scale_weight=None, n_features=1024, **kwargs):
        super(RandomFeatureMmdLoss, self).__init__(**kwargs)
        self.original_sigma = sigma
        self.sigma = [np.sqrt(float(s)) for s in sigma]
        self.n_scales = len(sigma)

        if scale_weight is None:
            self.scale_weight = [1.0] * self.n_scales
        else:
            assert(len(scale_weight) == len(sigma))
            self.scale_weight = [float(w) for w in scale_weight]

        self.n_features = n_features

    def _generate_random_matrix(self, n_features, n_dims, sigma):
        """
        return a list of random matrices each of size n_features x n_dims
        """
        w = []
        for i in range(len(sigma)):
            w.append(gnp.randn(n_features, n_dims) / sigma[i])
        return w

    def _generate_random_features(self, x, w):
        return gnp.cos(x.dot(w.T)) / np.sqrt(self.n_features), \
                gnp.sin(x.dot(w.T)) / np.sqrt(self.n_features)

    def load_target(self, target, **kwargs):
        """
        target is the target data distribution, n_cases * n_dims matrix.
        """
        # actually target does not need to be stored
        if isinstance(target, gnp.garray):
            self.target = target
        else:
            self.target = gnp.garray(target)

        self.n_target = target.shape[0]
        self.w = self._generate_random_matrix(self.n_features, target.shape[1], self.sigma)

        self.v_target = []
        for w in self.w:
            t_c, t_s = self._generate_random_features(target, w)
            self.v_target.append((t_c.mean(axis=0), t_s.mean(axis=0)))

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        if not isinstance(pred, gnp.garray):
            pred = gnp.garray(pred)

        loss = 0
        grad = None
        for i in range(self.n_scales):
            w = self.w[i]
            x_c, x_s = self._generate_random_features(pred, w)
            d_c = x_c.mean(axis=0) - self.v_target[i][0]
            d_s = x_s.mean(axis=0) - self.v_target[i][1]

            loss += ((d_c**2).sum() + (d_s**2).sum()) * self.scale_weight[i]
            s_c = 2.0 / pred.shape[0] * d_c 
            s_s = 2.0 / pred.shape[0] * d_s

            g = (-x_s * s_c + x_c * s_s).dot(w) * self.scale_weight[i]

            if grad is None:
                grad = g
            else:
                grad += g

        return loss, grad

    def get_name(self):
        return 'random_feature_mmdgen'

    def get_id(self):
        return 205

    def __repr__(self):
        return 'Loss <%s> w=%g, nf=%d, sigma=%s, scale_weight=%s' % (
                self.get_name(), self.weight, self.n_features, str(self.original_sigma),
                str(self.scale_weight))

ls.register_loss(RandomFeatureMmdLoss())

class PairMmdLossMultiScale(ls.Loss):
    """
    Multi-scale MMD loss for unsupervised learning.

    This loss measures the discrepancy between a distribution given by a 
    neural net model with a data distribution.

    This class considers only a pair of distributions, rather than a set of 
    multiple distributions.
    """
    def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
        super(PairMmdLossMultiScale, self).__init__(**kwargs)
        self.sigma = [float(s) for s in sigma]
        self.n_scales = len(sigma)

        if scale_weight is None:
            self.scale_weight = [1.0] * self.n_scales
        else:
            assert(len(scale_weight) == len(sigma))
            self.scale_weight = [float(w) for w in scale_weight]

    def load_target(self, target, **kwargs):
        """
        target is the target data distribution, n_cases * n_dims matrix.
        """
        if isinstance(target, gnp.garray):
            self.target = target
        else:
            self.target = gnp.garray(target)

        self.n_target = target.shape[0]

    def _make_s_mat(self, n_pred, n_target):
        """
        Create the S matrix that will be used in loss computation.
        """
        s = gnp.zeros((n_pred + n_target, 1))
        s[:n_pred] = 1
        s = s / n_pred - (1 - s) / n_target
        return s

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        if not isinstance(pred, gnp.garray):
            pred = gnp.garray(pred)

        n_pred = pred.shape[0]
        W = self._make_s_mat(n_pred, self.n_target)
        X = gnp.concatenate((pred, self.target), axis=0)

        XX = X.dot(X.T)
        if XX.shape[0] > 4000:  # this special case is due to a weird bug in gnumpy
            x = gnp.garray(np.diag(XX.asarray()))
        else:
            x = XX.diag()

        prod_mat = XX - 0.5 * x - 0.5 * x[:,gnp.newaxis]
        ww = W.dot(W.T)

        loss = 0
        grad = None
        for i in range(self.n_scales):
            K = gnp.exp(1.0 / self.sigma[i] * prod_mat)
            A = self.scale_weight[i] * ww * K
            loss += A.sum()
            a = A.sum(axis=1)
            if grad is None:
                grad = 2.0 / self.sigma[i] * (A.dot(X) - X * a[:,gnp.newaxis])
            else:
                grad += 2.0 / self.sigma[i] * (A.dot(X) - X * a[:,gnp.newaxis])

        return loss, grad[:n_pred,:]

    def get_name(self):
        return 'mmdgen_multiscale_pair'

    def get_id(self):
        return 206

    def __repr__(self):
        return 'Loss <%s> w=%g, sigma=%s, scale_weight=%s' % (
                self.get_name(), self.weight, str(self.sigma), str(self.scale_weight))

ls.register_loss(PairMmdLossMultiScale())

############################################################
# Some extensions to the loss
############################################################

class DifferentiableKernelMmdLoss(ls.Loss):
    """
    Base class for MMD loss with kernels that can be backpropagated through.
    """
    def __init__(self, **kwargs):
        super(DifferentiableKernelMmdLoss, self).__init__(**kwargs)

    def load_target(self, target, **kwargs):
        """
        target is the target data batch that we want our model to match.
        """
        self.target = util.to_garray(target)
        self.n_target = self.target.shape[0]

    def _make_s_mat(self, n_pred, n_target):
        """
        Make the S matrix. Here it is only a single vector as we have only two
        domains.

        The full set of data is always assumed to have the samples (pred) first
        and then the real data (target).
        """
        s = gnp.zeros((n_pred + n_target, 1))
        s[:n_pred] = 1
        s = s / n_pred - (1 - s) / n_target
        return s

        #s = gnp.zeros((n_pred + n_target, 2))
        #s[:n_pred, 0] = 1.0 / n_pred
        #s[n_pred:, 1] = 1.0 / n_target
        #s -= 1.0 / (n_pred + n_target)
        #return s

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        """
        Return loss and gradient
        """
        raise NotImplementedError()

class MultiScaleDifferentiableKernelMmdLoss(DifferentiableKernelMmdLoss):
    """
    Base class for MMD loss with kernels on multiple scales.
    """
    def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
        super(MultiScaleDifferentiableKernelMmdLoss, self).__init__(**kwargs)
        if not isinstance(sigma, list):
            sigma = [sigma]
        self.sigma = [float(s) for s in sigma]
        self.n_scales = len(sigma)

        if scale_weight is None:
            self.scale_weight = [1.0] * self.n_scales
        else:
            if not isinstance(scale_weight, list):
                scale_weight = [scale_weight]
            assert(len(scale_weight) == len(sigma))
            self.scale_weight = [float(w) for w in scale_weight]

class GaussianKernelMmdLoss(MultiScaleDifferentiableKernelMmdLoss):
    """
    k(x,y) = exp(-|x-y|^2 / (2 sigma))

    Multi-scale MMD loss with Gaussian kernels.  Essentially reimplementing 
    PairMmdLossMultiScale / UnsupervisedMmdLoss.
    """
    def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
        super(GaussianKernelMmdLoss, self).__init__(sigma=sigma, 
                scale_weight=scale_weight, **kwargs)

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        pred = util.to_garray(pred)
        n_pred = pred.shape[0]

        W = self._make_s_mat(n_pred, self.n_target)
        X = gnp.concatenate((pred, self.target), axis=0)

        XX = X.dot(X.T)
        if XX.shape[0] > 4000:  # this special case is due to a weird bug in gnumpy
            x = gnp.garray(np.diag(XX.asarray()))
        else:
            x = XX.diag()

        prod_mat = XX - 0.5 * x - 0.5 * x[:,gnp.newaxis]
        ww = W.dot(W.T)

        loss = 0
        grad = None
        for i in range(self.n_scales):
            K = gnp.exp(1.0 / self.sigma[i] * prod_mat)
            A = self.scale_weight[i] * ww * K
            loss += A.sum()
            a = A.sum(axis=1)
            if grad is None:
                grad = 2.0 / self.sigma[i] * (A.dot(X) - X * a[:,gnp.newaxis])
            else:
                grad += 2.0 / self.sigma[i] * (A.dot(X) - X * a[:,gnp.newaxis])

        return loss, grad[:n_pred,:]

    def get_name(self):
        return 'mmdgen_gaussian'

    def get_id(self):
        return 301

    def __repr__(self):
        return 'Loss <%s> w=%g, sigma=%s, scale_weight=%s' % (
                self.get_name(), self.weight, str(self.sigma), str(self.scale_weight))

ls.register_loss(GaussianKernelMmdLoss())

class LaplacianKernelMmdLoss(MultiScaleDifferentiableKernelMmdLoss):
    """
    k(x,y) = exp(-|x-y|_2/sigma)
    """
    def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
        super(LaplacianKernelMmdLoss, self).__init__(sigma=sigma, 
                scale_weight=scale_weight, **kwargs)

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        pred = util.to_garray(pred)
        n_pred = pred.shape[0]

        W = self._make_s_mat(n_pred, self.n_target)
        X = gnp.concatenate((pred, self.target), axis=0)

        ww = W.dot(W.T)

        XX = X.dot(X.T)
        if XX.shape[0] > 4000:  # this special case is due to a weird bug in gnumpy
            x = gnp.garray(np.diag(XX.asarray()))
        else:
            x = XX.diag()

        idx = np.arange(X.shape[0])
        zv = gnp.zeros(idx.size)

        # handle numeric problems
        _R = x + x[:,gnp.newaxis] - 2 * XX
        _R_min = _R.min()
        if _R_min < 1e-4:
            _R = _R - _R_min + 1e-4
            _R[idx,idx] = zv

        R = gnp.sqrt(_R)

        loss = 0
        grad = None

        for i in range(self.n_scales):
            K = gnp.exp(-1.0 / self.sigma[i] * R)
            L = self.scale_weight[i] * ww * K
            loss += L.sum()
            A = L / (R + gnp.eye(L.shape[0]))
            A[idx,idx] = zv
            a = A.sum(axis=1)
            if grad is None:
                grad = 2.0 / self.sigma[i] * (A.dot(X) - X * a[:,gnp.newaxis])
            else:
                grad += 2.0 / self.sigma[i] * (A.dot(X) - X * a[:,gnp.newaxis])
        
        return loss, grad[:n_pred,:]

    def get_name(self):
        return 'mmdgen_laplacian'

    def get_id(self):
        return 302

    def __repr__(self):
        return 'Loss <%s> w=%g, sigma=%s, scale_weight=%s' % (
                self.get_name(), self.weight, str(self.sigma), str(self.scale_weight))

ls.register_loss(LaplacianKernelMmdLoss())

class LaplacianL1KernelMmdLoss(MultiScaleDifferentiableKernelMmdLoss):
    """
    k(x,y) = exp(-|x-y|/sigma)
    """
    def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
        super(LaplacianL1KernelMmdLoss, self).__init__(sigma=sigma, 
                scale_weight=scale_weight, **kwargs)

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        pred = util.to_garray(pred)
        n_pred = pred.shape[0]

        W = self._make_s_mat(n_pred, self.n_target)
        X = gnp.concatenate((pred, self.target), axis=0)

        ww = W.dot(W.T)

        loss = 0
        grad = None
        
        for i in range(X.shape[0]):
            v = X[i]
            w = ww[i]
            diff = X - v

            a = diff.abs().sum(axis=1)
            for i_scale in range(self.n_scales):
                k = gnp.exp(-a / self.sigma[i_scale])
                loss += self.scale_weight[i_scale] * (w * k).sum()

                g = (self.scale_weight[i_scale] * w * k / self.sigma[i_scale])[:,gnp.newaxis] * ((diff < 0) - (diff > 0)) 
                g[i] = -g.sum(axis=0)
                if grad is None:
                    grad = g
                else:
                    grad += g

        return loss, grad[:n_pred,:]

    def get_name(self):
        return 'mmdgen_laplacian_l1'

    def get_id(self):
        return 303

    def __repr__(self):
        return 'Loss <%s> w=%g, sigma=%s, scale_weight=%s' % (
                self.get_name(), self.weight, str(self.sigma), str(self.scale_weight))

ls.register_loss(LaplacianL1KernelMmdLoss())

class SqrtGaussianKernelMmdLoss(GaussianKernelMmdLoss):
    """
    k(x,y) = sqrt{exp(-|x-y|^2 / (2 sigma))}

    Multi-scale MMD loss with Gaussian kernels.  Essentially reimplementing 
    PairMmdLossMultiScale / UnsupervisedMmdLoss.
    """
    def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
        super(SqrtGaussianKernelMmdLoss, self).__init__(sigma=sigma, 
                scale_weight=scale_weight, **kwargs)

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        loss, grad = super(SqrtGaussianKernelMmdLoss, self).compute_not_weighted_loss_and_grad(pred, compute_grad=compute_grad)
        sqrt_loss = math.sqrt(loss)
        return sqrt_loss, grad / (2 * sqrt_loss + 1e-10)

    def get_name(self):
        return 'mmdgen_sqrt_gaussian'

    def get_id(self):
        return 304

    def __repr__(self):
        return 'Loss <%s> w=%g, sigma=%s, scale_weight=%s' % (
                self.get_name(), self.weight, str(self.sigma), str(self.scale_weight))

ls.register_loss(SqrtGaussianKernelMmdLoss())

class CpuDifferentiableKernelMmdLoss(ls.Loss):
    """
    Base class for MMD loss with kernels that can be backpropagated through.
    """
    def __init__(self, **kwargs):
        super(CpuDifferentiableKernelMmdLoss, self).__init__(**kwargs)

    def load_target(self, target, **kwargs):
        """
        target is the target data batch that we want our model to match.
        """
        self.target = util.to_nparray(target)
        self.n_target = self.target.shape[0]

    def _make_s_mat(self, n_pred, n_target):
        """
        Make the S matrix. Here it is only a single vector as we have only two
        domains.

        The full set of data is always assumed to have the samples (pred) first
        and then the real data (target).
        """
        s = np.zeros((n_pred + n_target, 1), dtype=np.float32)
        s[:n_pred] = 1
        s = s / n_pred - (1 - s) / n_target
        return s

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        """
        Return loss and gradient
        """
        raise NotImplementedError()

class CpuMultiScaleDifferentiableKernelMmdLoss(CpuDifferentiableKernelMmdLoss):
    """
    Base class for MMD loss with kernels on multiple scales.
    """
    def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
        super(CpuMultiScaleDifferentiableKernelMmdLoss, self).__init__(**kwargs)
        if not isinstance(sigma, list):
            sigma = [sigma]
        self.sigma = [float(s) for s in sigma]
        self.n_scales = len(sigma)

        if scale_weight is None:
            self.scale_weight = [1.0] * self.n_scales
        else:
            if not isinstance(scale_weight, list):
                scale_weight = [scale_weight]
            assert(len(scale_weight) == len(sigma))
            self.scale_weight = [float(w) for w in scale_weight]

class CpuGaussianKernelMmdLoss(CpuMultiScaleDifferentiableKernelMmdLoss):
    """
    k(x,y) = exp(-|x-y|^2 / (2 sigma))

    Multi-scale MMD loss with Gaussian kernels.  Essentially reimplementing 
    PairMmdLossMultiScale / UnsupervisedMmdLoss.
    """
    def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
        super(CpuGaussianKernelMmdLoss, self).__init__(sigma=sigma, 
                scale_weight=scale_weight, **kwargs)

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        pred = util.to_nparray(pred)
        n_pred = pred.shape[0]

        W = self._make_s_mat(n_pred, self.n_target)
        X = np.concatenate((pred, self.target), axis=0)

        XX = X.dot(X.T)
        x = np.diag(XX)

        prod_mat = XX - 0.5 * x - 0.5 * x[:,np.newaxis]
        ww = W.dot(W.T)

        loss = 0
        grad = None

        K = self.scale_weight[0] * np.exp(1.0 / self.sigma[0] * prod_mat)
        scaled_K = K / self.sigma[0]
        for i in range(1, self.n_scales):
            T = self.scale_weight[i] * np.exp(1.0 / self.sigma[i] * prod_mat)
            K += T
            scaled_K += T / self.sigma[i]

        loss = (ww * K).sum()
        A = ww * scaled_K
        a = A.sum(axis=1)

        grad = 2.0 * (A[:n_pred,:].dot(X) - X[:n_pred,:] * a[:n_pred,np.newaxis])

        return loss, util.to_garray(grad)

    def get_name(self):
        return 'cpu_mmdgen_gaussian'

    def get_id(self):
        return 305

    def __repr__(self):
        return 'Loss <%s> w=%g, sigma=%s, scale_weight=%s' % (
                self.get_name(), self.weight, str(self.sigma), str(self.scale_weight))

ls.register_loss(CpuGaussianKernelMmdLoss())

class CpuSqrtGaussianKernelMmdLoss(CpuGaussianKernelMmdLoss):
    """
    k(x,y) = sqrt{exp(-|x-y|^2 / (2 sigma))}

    Multi-scale MMD loss with Gaussian kernels.  Essentially reimplementing 
    PairMmdLossMultiScale / UnsupervisedMmdLoss.
    """
    def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
        super(CpuSqrtGaussianKernelMmdLoss, self).__init__(sigma=sigma, 
                scale_weight=scale_weight, **kwargs)

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        loss, grad = super(CpuSqrtGaussianKernelMmdLoss, self).compute_not_weighted_loss_and_grad(pred, compute_grad=compute_grad)
        sqrt_loss = math.sqrt(loss)
        return sqrt_loss, grad / (2 * sqrt_loss + 1e-10)

    def get_name(self):
        return 'cpu_mmdgen_sqrt_gaussian'

    def get_id(self):
        return 306

    def __repr__(self):
        return 'Loss <%s> w=%g, sigma=%s, scale_weight=%s' % (
                self.get_name(), self.weight, str(self.sigma), str(self.scale_weight))

ls.register_loss(CpuSqrtGaussianKernelMmdLoss())

class CpuPerExampleSqrtGaussianKernelMmdLoss(ls.Loss):
    """
    Each batch contains multiple examples, MMD is applied on a per example basis.
    """
    def __init__(self, sigma=[1.0], scale_weight=None, pred_per_example=1, **kwargs):
        super(CpuPerExampleSqrtGaussianKernelMmdLoss, self).__init__(**kwargs)
        self.mmd_loss = CpuSqrtGaussianKernelMmdLoss(sigma=sigma, scale_weight=scale_weight, **kwargs)
        self.pred_per_example = pred_per_example

    def load_target(self, target, **kwargs):
        """
        target is the target data batch that we want our model to match.

        target is a list of target matrices, each correspond to the targets for
        one prediction/one group of predictions.
        """
        self.target = [util.to_nparray(t) if len(t.shape) > 1 else util.to_nparray(t)[np.newaxis,:] for t in target]

    def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
        """
        Return loss and gradient
        """
        pred = util.to_nparray(pred)

        loss = 0
        grad = gnp.zeros(pred.shape)

        assert pred.shape[0] % self.pred_per_example == 0
        n_groups = pred.shape[0] / self.pred_per_example
        assert n_groups == len(self.target)

        for i_group in range(n_groups):
            i_start = i_group * self.pred_per_example
            i_end = i_start + self.pred_per_example

            self.mmd_loss.load_target(self.target[i_group])
            t_loss, grad[i_start:i_end] = self.mmd_loss.compute_not_weighted_loss_and_grad(pred[i_start:i_end], compute_grad=True)
            loss += t_loss

        return loss / n_groups, grad / n_groups

    def get_name(self):
        return 'cpu_per_example_mmdgen_sqrt_gaussian'

    def get_id(self):
        return 307

    def __repr__(self):
        return 'Loss <%s> w=%g, sigma=%s, scale_weight=%s, pred_per_example=%s' % (
                self.get_name(), self.mmd_loss.weight, str(self.mmd_loss.sigma), str(self.mmd_loss.scale_weight), str(self.pred_per_example))

ls.register_loss(CpuPerExampleSqrtGaussianKernelMmdLoss())

############################################################
# Learners, samplers, and others
############################################################

class StochasticGenerativeNet(nn.NeuralNet):
    """
    A generative feed-forward neural net with a layer of stochastic hidden 
    units at the top (or bottom depending on how you orient the network), and
    a deterministic top-down mapping given by the neural net.

    The hidden units are fixed to have a uniform distribution over the space
    [-1,1]^out_dim.
    """
    def __init__(self, in_dim=0, out_dim=0):
        super(StochasticGenerativeNet, self).__init__(in_dim, out_dim)

    def sample_hiddens(self, n_samples):
        """
        Generate specified number of samples of hidden units.
        """
        return 2 * gnp.rand(n_samples, self.in_dim) - 1
        # return gnp.randn(n_samples, self.in_dim)

    def generate_samples(self, z=None, n_samples=100, sample_batch_size=1000):
        """
        Generate samples of visibles units. The provided z will be used for
        propagating samples if given, otherwise new samples of z will be
        generated using sample_hiddens.
        """
        if z is not None:
            return self.forward_prop(z, add_noise=False, compute_loss=False)

        samples = gnp.zeros((n_samples, self.out_dim))
        n_batches = (n_samples + sample_batch_size - 1) / sample_batch_size

        for i_batch in range(n_batches):
            i_start = i_batch * sample_batch_size
            i_end = (i_start + sample_batch_size) if i_batch + 1 < n_batches else n_samples
            n_samples_in_batch = i_end - i_start

            z = self.sample_hiddens(n_samples_in_batch)
            samples[i_start:i_end] = self.forward_prop(z, add_noise=False, compute_loss=False)

        # return self.forward_prop(z, add_noise=False, compute_loss=False)
        return samples

class StochasticGenerativeNetWithAutoencoder(StochasticGenerativeNet):
    """
    A StochasticGenerativeNet together with an autoencoder. The stochastic 
    generative network is used in the code layer of the autoencoder.
    """
    def __init__(self, in_dim=0, out_dim=0, autoencoder=None):
        super(StochasticGenerativeNetWithAutoencoder, self).__init__(in_dim, out_dim)
        self.autoencoder = autoencoder

    def _generate_code_samples(self, z=None, n_samples=100, sample_batch_size=1000):
        return super(StochasticGenerativeNetWithAutoencoder, self).generate_samples(
                z=z, n_samples=n_samples, sample_batch_size=sample_batch_size)

    def generate_samples(self, z=None, n_samples=100, sample_batch_size=1000):
        return self.autoencoder.decoder.forward_prop(self._generate_code_samples(
                z=z, n_samples=n_samples, sample_batch_size=sample_batch_size))

    def load_target(self, target, *args, **kwargs):
        """
        Need to first transform target into the code space using encoder.
        """
        super(StochasticGenerativeNetWithAutoencoder, self).load_target(
                self.autoencoder.encode(target), *args, **kwargs)

class StochasticGenerativeNetWithAutoencoderContainer(object):
    """
    A container used to combine a net with an autoencoder after training - for
    generating samples.
    """
    def __init__(self, net, autoencoder):
        self.net = net
        self.autoencoder = autoencoder

    def generate_samples(self, z=None, n_samples=100, sample_batch_size=1000):
        return self.autoencoder.decoder.forward_prop(
                self.net.generate_samples(z, n_samples, sample_batch_size))

class SampleFilter(object):
    """
    Used to filter samples.
    """
    def __init__(self):
        pass

    def filter(self, x):
        """
        x: n x D is a matrix of examples

        Return a matrix n' x D, with n' <= n, such that it contains all 'good'
        samples.
        """
        raise NotImplementedError()

class BlankSampleFilter(SampleFilter):
    """
    Place holder for debugging, this class does nothing.
    """
    def filter(self, x):
        return x

class ClassifierSampleFilter(SampleFilter):
    """
    Applies a classifier to judge whether a sample is good.
    """
    def __init__(self, classifier, threshold, prev=None):
        """
        The classifier makes probabilistic predictions and has a function 
        predict_proba that outputs a prediction matrix with elements between 0
        and 1.  p[i][0] close to 1 indicates that a sample is good, close to 0
        indicates a sample is bad. p[i][1] should always be 1-p[i][0].

        prev: allows multiple filters to be chained together.
        """
        self.classifier = classifier
        self.threshold = threshold
        self.prev = prev

    def filter(self, x):
        if self.prev is not None:
            x = self.prev.filter(x)
        is_garray = isinstance(x, gnp.garray)
        if is_garray:
            x = x.asarray()
        p = self.classifier.predict_proba(x)
        idx = np.arange(p.shape[0])[p[:,0] > self.threshold]
        x = x[idx]
        if is_garray:
            x = gnp.garray(x)
        return x

class ClassifierSampleStochasticFilter(SampleFilter):
    """
    Same as above, but filter out samples probabilistically rather than
    deterministically using a hard threshold.
    """
    def __init__(self, classifier, prev=None):
        """
        The classifier should support probabilistic outputs.
        """
        self.classifier = classifier
        self.prev = prev

    def filter(self, x):
        if self.prev is not None:
            x = self.prev.filter(x)
        is_garray = isinstance(x, gnp.garray)
        if is_garray:
            x = x.asarray()
        p = self.classifier.predict_proba(x)

        # TODO: implement probabilistic filtering
        idx = np.arange(p.shape[0])[p[:,0] > np.random.rand(p.shape[0])]
        x = x[idx]
        if is_garray:
            x = gnp.garray(x)

        return x


class StochasticGenerativeNetWithFilter(object):
    """
    This is a class used purely for generating samples, it is required to have
    a method called generate_samples.

    StochasticGenerativeNet can be used as a subclass of this one.
    """
    def __init__(self, net, sample_filter):
        """
        net can be StochasticGenerativeNet, or StochasticGenerativeNetWithFilter,
        which allows multiple filtered nets to be chained together.
        """
        self.net = net
        self.sample_filter = sample_filter

    def generate_samples(self, z=None, n_samples=100):
        """
        Generate samples from the StochasticGenerativeNet and then filter out
        bad samples using the sample filter.
        """
        factor = 2
        x = self.sample_filter.filter(self.net.generate_samples(z, n_samples * factor))[:n_samples]
        gnp.free_reuse_cache()
        is_garray = isinstance(x, gnp.garray)
        while x.shape[0] < n_samples:
            # factor *= 2   # this will explode in high threshold settings
            y = self.sample_filter.filter(self.net.generate_samples(z, (n_samples - x.shape[0]) * factor))
            if is_garray:
                x = gnp.concatenate([x, y[:n_samples - x.shape[0]]], axis=0)
            else:
                x = np.r_[x, y[:n_samples - x.shape[0]]]
            gnp.free_reuse_cache()

        return x

class StochasticGenerativeNetLearner(learner.Learner):
    """
    Used for learning the StochasticGenerativeNet model.
    """
    def __init__(self, net):
        super(StochasticGenerativeNetLearner, self).__init__(net)
        self.n_samples_per_update = 100
        self.n_sample_update_iters = 1
        self.i_sample_update_iter = 0

        self.set_output_dir('.')

    def load_data(self, x_train):
        self.x_train = util.to_garray(x_train)

    def load_train_target(self):
        self.net.load_target(self.x_train)

    def sample_hiddens(self):
        self.z = self.net.sample_hiddens(self.n_samples_per_update)

    def f_and_fprime(self, w):
        self.net.set_param_from_vec(w)
        self.net.clear_gradient()
        if self.i_sample_update_iter % self.n_sample_update_iters == 0:
            self.sample_hiddens()
        self.i_sample_update_iter = (self.i_sample_update_iter + 1) % self.n_sample_update_iters
        self.net.forward_prop(self.z, add_noise=True, compute_loss=True)
        loss = self.net.get_loss() / self.z.shape[0]
        self.net.backward_prop()
        grad = self.net.get_grad_vec() / self.z.shape[0]
        return loss, grad

    def create_minibatch_generator(self, minibatch_size):
        self.minibatch_generator = learner.MiniBatchGenerator(
                self.x_train, minibatch_size=minibatch_size, random_order=True)

    def f_and_fprime_minibatch(self, w):
        self.net.set_param_from_vec(w)
        self.net.clear_gradient()

        if self.i_sample_update_iter % self.n_sample_update_iters == 0:
            if self.minibatch_load_target:
                x = self.minibatch_generator.next()
                self.net.load_target(x)
            self.sample_hiddens()

        self.i_sample_update_iter = (self.i_sample_update_iter + 1) % self.n_sample_update_iters

        self.net.forward_prop(self.z, add_noise=True, compute_loss=True)
        loss = self.net.get_loss() / self.z.shape[0]
        self.net.backward_prop()
        grad = self.net.get_grad_vec() / self.z.shape[0]

        return loss, grad

    def train_stochastic_lbfgs(self, **kwargs):
        self._prepare_for_training()
        if 'minibatch_size' in kwargs:
            minibatch_size = kwargs['minibatch_size']
            del kwargs['minibatch_size']
        else:
            minibatch_size = 100

        self.create_minibatch_generator(minibatch_size)
        self._process_options(kwargs)
        #self.print_options(kwargs)
        self.best_w, self.best_obj, d = spopt.fmin_l_bfgs_b(self.f_and_fprime_minibatch, self.init_w, **kwargs)
        self.best_grad = d['grad']
        return self.f_post_training()

    def f_info(self, w):
        """
        train_loss = None

        w_0 = self.net.get_param_vec()
        self.net.set_noiseless_param_from_vec(w)

        y = self.net.forward_prop(self.x_train, add_noise=False, compute_loss=True)
        train_loss = self.net.get_loss() / self.x_train.shape[0]
        train_acc = (self.t_train == y.argmax(axis=1)).mean()

        if self.use_validation:
            y = self.net.forward_prop(self.x_val, add_noise=False, compute_loss=False)
            val_acc = (self.t_val == y.argmax(axis=1)).mean()

            s = 'train loss %.4f, acc %.4f, val acc ' % (train_loss, train_acc)
            if self.best_obj is None or val_acc > self.best_obj:
                self.best_obj = val_acc 
                self.best_w = w.copy()
                s += co.good_colored_str('%.4f' % val_acc)
            else:
                s += '%.4f' % val_acc
        else:
            s = 'train loss %.4f, acc ' % train_loss
            if self.best_obj is None or train_acc < self.best_obj:
                self.best_obj = train_acc
                self.best_w = w.copy()
                s += co.good_colored_str('%.4f' % train_acc)
            else:
                s += '%.4f' % train_acc

        self.net.set_param_from_vec(w_0)
        return s
        """
        return '<place holder>'

    def _process_options(self, kwargs):
        if 'n_samples_per_update' in kwargs:
            self.n_samples_per_update = kwargs['n_samples_per_update']
            del kwargs['n_samples_per_update']
        if 'n_sample_update_iters' in kwargs:
            self.n_sample_update_iters = kwargs['n_sample_update_iters']
            del kwargs['n_sample_update_iters']

        self.i_sample_update_iter = 0

        if 'minibatch_size' in kwargs:
            minibatch_size = kwargs['minibatch_size']
            del kwargs['minibatch_size']
        else:
            minibatch_size = 100

        self.create_minibatch_generator(minibatch_size)

        self.minibatch_load_target = True
        if 'minibatch_load_target' in kwargs:
            self.minibatch_load_target = kwargs['minibatch_load_target']
            del kwargs['minibatch_load_target']

    def f_post_training(self):
        # self.net.set_param_from_vec(self.best_w)
        if hasattr(self, 'best_grad') and hasattr(self, 'best_obj'):
            return self.best_obj, self.best_grad

    def save_model(self):
        # self.net.save_model_to_file(self.output_dir + '/gen_%s.pdata' % (time.strftime('%Y%m%d_%H%M%S', time.localtime())))
        self.net.save_model_to_file(self.output_dir + '/gen_end.pdata')

    def save_checkpoint(self, label):
        self.net.save_model_to_file(self.output_dir + '/checkpoint_%s.pdata' % str(label))

class StochasticGenerativeNetLearnerAutoScale(learner.Learner):
    """
    Used for learning the StochasticGenerativeNet model with MMD loss.  The
    scale parameter will be automatically tuned.
    """
    def __init__(self, net):
        super(StochasticGenerativeNetLearnerAutoScale, self).__init__(net)
        self.n_samples_per_update = 100
        self.n_sample_update_iters = 1
        self.i_sample_update_iter = 0
        self.i_scale_update_iter = 0
        self.n_scale_update_iters = 0
        self.n_scale_update_samples = 2000
        self._scale_selection_range = np.logspace(0, 8, 30)

        self.set_output_dir('.')

    def load_data(self, x_train):
        self.x_train = util.to_garray(x_train)

    def load_train_target(self):
        self.net.load_target(self.x_train)

    def sample_hiddens(self):
        self.z = self.net.sample_hiddens(self.n_samples_per_update)

    def update_loss_scale(self):
        """
        Automatically set the scale of the loss.
        """
        n_data_samples = min(self.x_train.shape[0], self.n_scale_update_samples)
        data = self.x_train[np.random.permutation(self.x_train.shape[0])[:n_data_samples]]
        samples = self.net.generate_samples(n_samples=self.n_scale_update_samples)

        max_loss = 0
        max_sigma = 1
        for s in self._scale_selection_range:
            mmd = ls.get_loss_from_type_name(self.net.loss.get_name(), sigma=s, scale_weight=self.net.loss.scale_weight[0])
            mmd.load_target(data)
            loss = mmd.compute_not_weighted_loss_and_grad(samples, compute_grad=False)[0]
            if loss > max_loss:
                max_loss = loss
                max_sigma = s

        print '>>> Reset loss...'
        self.net.loss.sigma = [float(max_sigma)]
        self.net.loss.scale_weight = [float(self.net.loss.scale_weight[0])]
        print '>>>',
        print self.net.loss

    def f_and_fprime(self, w):
        self.net.set_param_from_vec(w)
        self.net.clear_gradient()

        # resample if necessary
        if self.i_sample_update_iter % self.n_sample_update_iters == 0:
            self.sample_hiddens()
        self.i_sample_update_iter = (self.i_sample_update_iter + 1) % self.n_sample_update_iters

        # update scale of the loss if necessary
        if self.n_scale_update_iters > 0:
            if self.i_scale_update_iter % self.n_scale_update_iters == 0:
                self.update_loss_scale()
            self.i_scale_update_iter = (self.i_scale_update_iter + 1) % self.n_scale_update_iters

        self.net.forward_prop(self.z, add_noise=True, compute_loss=True)
        loss = self.net.get_loss() / self.z.shape[0]
        self.net.backward_prop()
        grad = self.net.get_grad_vec() / self.z.shape[0]
        return loss, grad

    def create_minibatch_generator(self, minibatch_size):
        self.minibatch_generator = learner.MiniBatchGenerator(
                self.x_train, minibatch_size=minibatch_size, random_order=True)

    def f_and_fprime_minibatch(self, w):
        self.net.set_param_from_vec(w)
        self.net.clear_gradient()

        if self.i_sample_update_iter % self.n_sample_update_iters == 0:
            if self.minibatch_load_target:
                x = self.minibatch_generator.next()
                self.net.load_target(x)
            self.sample_hiddens()

        self.i_sample_update_iter = (self.i_sample_update_iter + 1) % self.n_sample_update_iters

        if self.n_scale_update_iters > 0:
            if self.i_scale_update_iter % self.n_scale_update_iters == 0:
                self.update_loss_scale()
            self.i_scale_update_iter = (self.i_scale_update_iter + 1) % self.n_scale_update_iters

        self.net.forward_prop(self.z, add_noise=True, compute_loss=True)
        loss = self.net.get_loss() / self.z.shape[0]
        self.net.backward_prop()
        grad = self.net.get_grad_vec() / self.z.shape[0]

        return loss, grad

    def train_stochastic_lbfgs(self, **kwargs):
        self._prepare_for_training()
        if 'minibatch_size' in kwargs:
            minibatch_size = kwargs['minibatch_size']
            del kwargs['minibatch_size']
        else:
            minibatch_size = 100

        self.create_minibatch_generator(minibatch_size)
        self._process_options(kwargs)
        #self.print_options(kwargs)
        self.best_w, self.best_obj, d = spopt.fmin_l_bfgs_b(self.f_and_fprime_minibatch, self.init_w, **kwargs)
        self.best_grad = d['grad']
        return self.f_post_training()

    def f_info(self, w):
        """
        train_loss = None

        w_0 = self.net.get_param_vec()
        self.net.set_noiseless_param_from_vec(w)

        y = self.net.forward_prop(self.x_train, add_noise=False, compute_loss=True)
        train_loss = self.net.get_loss() / self.x_train.shape[0]
        train_acc = (self.t_train == y.argmax(axis=1)).mean()

        if self.use_validation:
            y = self.net.forward_prop(self.x_val, add_noise=False, compute_loss=False)
            val_acc = (self.t_val == y.argmax(axis=1)).mean()

            s = 'train loss %.4f, acc %.4f, val acc ' % (train_loss, train_acc)
            if self.best_obj is None or val_acc > self.best_obj:
                self.best_obj = val_acc 
                self.best_w = w.copy()
                s += co.good_colored_str('%.4f' % val_acc)
            else:
                s += '%.4f' % val_acc
        else:
            s = 'train loss %.4f, acc ' % train_loss
            if self.best_obj is None or train_acc < self.best_obj:
                self.best_obj = train_acc
                self.best_w = w.copy()
                s += co.good_colored_str('%.4f' % train_acc)
            else:
                s += '%.4f' % train_acc

        self.net.set_param_from_vec(w_0)
        return s
        """
        return '<place holder>'

    def _process_options(self, kwargs):
        if 'n_samples_per_update' in kwargs:
            self.n_samples_per_update = kwargs['n_samples_per_update']
            del kwargs['n_samples_per_update']
        if 'n_sample_update_iters' in kwargs:
            self.n_sample_update_iters = kwargs['n_sample_update_iters']
            del kwargs['n_sample_update_iters']

        self.i_sample_update_iter = 0

        if 'minibatch_size' in kwargs:
            minibatch_size = kwargs['minibatch_size']
            del kwargs['minibatch_size']
        else:
            minibatch_size = 100

        self.create_minibatch_generator(minibatch_size)

        self.minibatch_load_target = True
        if 'minibatch_load_target' in kwargs:
            self.minibatch_load_target = kwargs['minibatch_load_target']
            del kwargs['minibatch_load_target']

        if 'i_scale_update' in kwargs:
            self.n_scale_update_iters = kwargs['i_scale_update']
            del kwargs['i_scale_update']
        else:
            self.n_scale_update_iters = 0

        if 'n_scale_update_samples' in kwargs:
            self.n_scale_update_samples = kwargs['n_scale_update_samples']
            del kwargs['n_scale_update_samples']
        else:
            self.n_scale_update_samples = 2000

    def f_post_training(self):
        # self.net.set_param_from_vec(self.best_w)
        if hasattr(self, 'best_grad') and hasattr(self, 'best_obj'):
            return self.best_obj, self.best_grad

    def save_model(self):
        # self.net.save_model_to_file(self.output_dir + '/gen_%s.pdata' % (time.strftime('%Y%m%d_%H%M%S', time.localtime())))
        self.net.save_model_to_file(self.output_dir + '/gen_end.pdata')

    def save_checkpoint(self, label):
        self.net.save_model_to_file(self.output_dir + '/checkpoint_%s.pdata' % str(label))






================================================
FILE: core/kernels.py
================================================
"""
Implementation of different kernel functions.

Yujia Li, 11/2014
"""

import numpy as np
import gnumpy as gnp

def safe_diag(x):
    if isinstance(x, np.ndarray):
        return x.diagonal()
    if isinstance(x, gnp.garray):
        if x.shape[0] > 4000:
            return gnp.garray(x.asarray().diagonal())
        else:
            return x.diag()

    raise Exception()

class Kernel(object):
    def __init__(self):
        pass

    def compute_kernel_matrix(self, x):
        """
        x: n_examples * n_dims input data matrix

        Return: n_examples * n_examples kernel matrix
        """
        return self.compute_kernel_transformation(x, x)

    def compute_kernel_transformation(self, x_base, x_new):
        """
        x_base: n_examples_1 * n_dims data matrix
        x_new: n_examples_2 * n_dims data matrix

        For each example in x_new, compute its kernel distance with each of the
        examples in x_base, return a n_examples_2 * n_examples_1 matrix as the
        transformed representation of x_new.
        """
        raise NotImplementedError()

    def get_name(self):
        raise NotImplementedError()

class GaussianKernel(Kernel):
    def __init__(self, sigma):
        self.sigma = sigma

    def compute_kernel_matrix(self, x):
        x = x if isinstance(x, gnp.garray) else gnp.garray(x)
        xx = x.dot(x.T)
        x_diag = safe_diag(xx)

        return gnp.exp(-1.0 / (2 * self.sigma**2) * (-2 * xx + x_diag + x_diag[:,gnp.newaxis]))

    def compute_kernel_transformation(self, x_base, x_new):
        x_base = x_base if isinstance(x_base, gnp.garray) else gnp.garray(x_base)
        x_new = x_new if isinstance(x_new, gnp.garray) else gnp.garray(x_new)

        xx = x_new.dot(x_base.T)
        xx_base = (x_base**2).sum(axis=1)
        xx_new = (x_new**2).sum(axis=1)
        return gnp.exp(-1.0 / (2 * self.sigma**2) * (-2 * xx + xx_base + xx_new[:,gnp.newaxis]))

    def get_name(self):
        return 'gaussian_kernel'

class EuclideanKernel(Kernel):
    def __init__(self):
        pass

    def compute_kernel_matrix(self, x):
        x = x if isinstance(x, gnp.garray) else gnp.garray(x)
        xx = x.dot(x.T)
        x_diag = safe_diag(xx)

        return (-2 * xx + x_diag + x_diag[:,gnp.newaxis])

    def compute_kernel_transformation(self, x_base, x_new):
        x_base = x_base if isinstance(x_base, gnp.garray) else gnp.garray(x_base)
        x_new = x_new if isinstance(x_new, gnp.garray) else gnp.garray(x_new)

        xx = x_new.dot(x_base.T)
        xx_base = (x_base**2).sum(axis=1)
        xx_new = (x_new**2).sum(axis=1)

        return (-2 * xx + xx_base + xx_new[:,gnp.newaxis])

class CPUGaussianKernel(Kernel):
    def __init__(self, sigma):
        self.sigma = sigma

    def compute_kernel_matrix(self, x):
        pass

class LinearKernel(Kernel):
    def compute_kernel_matrix(self, x):
        x = x if isinstance(x, gnp.garray) else gnp.garray(x)
        return x.dot(x.T)

    def compute_kernel_transformation(self, x_base, x_new):
        x_base = x_base if isinstance(x_base, gnp.garray) else gnp.garray(x_base)
        x_new = x_new if isinstance(x_new, gnp.garray) else gnp.garray(x_new)

        return x_new.dot(x_base.T)

    def get_name(self):
        return 'linear_kernel'

class CosineKernel(Kernel):
    def compute_kernel_matrix(self, x):
        x = x if isinstance(x, gnp.garray) else gnp.garray(x)
        x_norm = gnp.sqrt((x**2).sum(axis=1))
        x_norm = x_norm[:,gnp.newaxis] + x_norm[gnp.newaxis,:] + 1e-20

        return x.dot(x.T) / x_norm

    def compute_kernel_transformation(self, x_base, x_new):
        x_base = x_base if isinstance(x_base, gnp.garray) else gnp.garray(x_base)
        x_new = x_new if isinstance(x_new, gnp.garray) else gnp.garray(x_new)

        base_norm = (x_base**2).sum(axis=1)
        new_norm = (x_new**2).sum(axis=1)

        return x_new.dot(x_base.T) / (base_norm + new_norm[:,gnp.newaxis])



================================================
FILE: core/util.py
================================================
"""
Some helpful utility functions.

Yujia Li, 09/2014
"""

import gnumpy as gnp
import numpy as np

def to_garray(x):
    return x if isinstance(x, gnp.garray) else gnp.garray(x)

def to_nparray(x):
    return x if isinstance(x, np.ndarray) else x.asarray()

def to_one_of_K(t, K=None):
    n_cases = t.size
    if K is None:
        K = t.max() + 1
    if len(t.shape) > 0:
        t = t.ravel()

    t_mat = np.zeros((n_cases, K))
    t_mat[np.arange(n_cases), t] = 1
    return gnp.garray(t_mat)

def to_plus_minus_of_K(t, K=None):
    """
    Convert the 1-D label vector into a matrix where the t[i]th element on the
    ith row is 1 and all others on that row is -1.
    """
    n_cases = t.size
    if K is None:
        K = t.max() + 1
    if len(t.shape) > 0:
        t = t.ravel()

    t_mat = -np.ones((n_cases, K))
    t_mat[np.arange(n_cases), t] = 1
    return gnp.garray(t_mat)



================================================
FILE: dataio/__init__.py
================================================


================================================
FILE: dataio/mnist.py
================================================
"""
Data I/O for mnist dataset.

Yujia Li, 01/2015
"""

import cPickle as pickle
import numpy as np

# Fill in the path to your mnist data here
#
# This data file is supposed to be a pickled dictionary containing numpy arrays
# including train_data, test_data, train_labels, test_labels.  For train_data
# and test_data, they are matrices of size NxD, where N is the number of data
# points and D=784 (28x28) is the size of the image.  Each row is a data point,
# assumed to be already normalized to [0,1].  For train_label and test_label, 
# they are matrices of size Nx1, each label is an integer from 0 to 9.
_DATA_FILE_PATH = 'path/to/your/mnist/data'

def load_raw_data():
    """
    Return the original train/test split.
    """
    with open(_DATA_FILE_PATH) as f:
        d = pickle.load(f)

    return d['train_data'], d['test_data']

def load_data():
    """
    Split part of training data to be used as validation data.
    """
    with open(_DATA_FILE_PATH) as f:
        d = pickle.load(f)

    x_train = d['train_data']
    x_test  = d['test_data']

    # keep current state of random number generator
    rand_state = np.random.get_state()

    np.random.seed(0)
    idx = np.random.permutation(x_train.shape[0])

    n_val = 5000
    x_val = x_train[idx[:n_val]]
    x_train = x_train[idx[n_val:]]

    # restore the state of random number generator
    np.random.set_state(rand_state)

    return x_train, x_val, x_test

def load_labeled_data(n_val=5000):
    """
    Load both the data and the labels.
    """
    with open(_DATA_FILE_PATH) as f:
        d = pickle.load(f)

    x_train = d['train_data']
    t_train = d['train_label']
    
    x_test = d['test_data']
    t_test = d['test_label']

    rand_state = np.random.get_state()

    np.random.seed(0)
    idx = np.random.permutation(x_train.shape[0])

    x_val = x_train[idx[:n_val]]
    t_val = t_train[idx[:n_val]]
    x_train = x_train[idx[n_val:]]
    t_train = t_train[idx[n_val:]]

    np.random.set_state(rand_state)

    return x_train, t_train, x_val, t_val, x_test, t_test




================================================
FILE: dataio/tfd.py
================================================
"""
The Toronto Face Database, Charlie/Marc'Aurelio's version

Yujia Li, 01/2015
"""

import scipy.io as sio
import numpy as np

# Fill in your TFD path here
_TFD_DATA_PATH_FORMAT = 'path/to/your/TFD_ranzato_%dx%d.mat'

def _load_raw_data(image_size=48):
    d = sio.loadmat(_TFD_DATA_PATH_FORMAT % (image_size, image_size))
    return d['images'], d['folds'], d['labs_id'].squeeze(), d['labs_ex'].squeeze()

def get_fixed_rand_permutation(size, seed=1):
    rand_state = np.random.get_state()
    np.random.seed(seed)
    idx = np.random.permutation(size)
    np.random.set_state(rand_state)

    return idx

class TFD(object):
    def __init__(self, image_size=48):
        self.images, self.folds, self.labs_id, self.labs_ex = \
                _load_raw_data(image_size)

        self._val_sizes = [(self.folds[:,fold] == 2).sum() for fold in range(5)]
        self._val_idx_start = np.array([0] + self._val_sizes).cumsum()

    def get_fold(self, fold, set_name, center=False, scale=False):
        """
        0 <= fold < 5
        set_name should be one of {train, val, test, unlabeled}

        Return images, labs_id, and labs_ex.

        There are two labels available: identity and expression.  For 
        unsupervised learning tasks these labels are not useful though.  The
        quality of these labels are also not very high.
        """
        set_map = {'unlabeled': 0, 'train' : 1, 'val': 2, 'test': 3}
        set_id = set_map[set_name]
        data_mask = (self.folds[:,fold] == set_id)

        images = self.images[data_mask].astype(np.float32)
        labs_id = self.labs_id[data_mask]
        labs_ex = self.labs_ex[data_mask]

        if center and scale:
            images -= 127.5
            images /= 127.5
        elif center:
            images -= 127.5
        elif scale:
            images /= 255.0

        return images, labs_id, labs_ex

    def get_proper_fold(self, fold, set_name, center=False, scale=False):
        """
        Same as get_fold, except that the validation sets across folds will be
        disjoint from test sets and training sets - so validation is proper.
        """
        set_map = {'unlabeled': 0, 'train' : 1, 'val': 2, 'test': 3}
        set_id = set_map[set_name]

        if set_id == 0 or set_id == 2:
            data_mask = (self.folds[:,fold] == 0)
            unlabeled_idx = np.arange(self.folds.shape[0])[data_mask]
            idx = get_fixed_rand_permutation(unlabeled_idx.size)
            data_mask = np.zeros(data_mask.size, dtype=np.bool)
            if set_id == 2:
                data_mask[idx[self._val_idx_start[fold]:self._val_idx_start[fold+1]]] = True
            else:
                data_mask[idx[self._val_idx_start[-1]:]] = True
        else:
            data_mask = (self.folds[:,fold] == set_id)

        images = self.images[data_mask].astype(np.float32)
        labs_id = self.labs_id[data_mask]
        labs_ex = self.labs_ex[data_mask]

        if center and scale:
            images -= 127.5
            images /= 127.5
        elif center:
            images -= 127.5
        elif scale:
            images /= 255.0

        return images, labs_id, labs_ex
        


_tfd = {48: None, 96: None}

def load_fold(fold, set_name, center=False, scale=False, image_size=48):
    if image_size != 48 and image_size != 96:
        raise Exception('image_size has to be either 48 or 96!')

    if _tfd[image_size] is None:
        _tfd[image_size] = TFD(image_size) # load data the first time we use it

    return _tfd[image_size].get_fold(fold, set_name, center, scale)

def load_proper_fold(fold, set_name, center=False, scale=False, image_size=48):
    if image_size != 48 and image_size != 96:
        raise Exception('image_size has to be either 48 or 96!')

    if _tfd[image_size] is None:
        _tfd[image_size] = TFD(image_size) # load data the first time we use it

    return _tfd[image_size].get_proper_fold(fold, set_name, center, scale)



================================================
FILE: eval_mmd_generative_model.py
================================================
"""
Module for evaluating MMD generative models.

Yujia Li, 11/2014
"""

import cPickle as pickle
import time
import numpy as np
import gnumpy as gnp
import core.generative as gen
import core.kernels as ker

def load_tfd_fold(fold=0):
    """
    Return train, val, test data for the particular fold.
    """
    import dataio.tfd as tfd
    # note that the training set used here is the 'unlabeled' set in TFD
    x_train, _, _ = tfd.load_fold(fold, 'unlabeled', scale=True)
    x_val,   _, _ = tfd.load_fold(fold, 'val', scale=True)
    x_test,  _, _ = tfd.load_fold(fold, 'test', scale=True)

    imsz = np.prod(x_train.shape[1:])

    return x_train.reshape(x_train.shape[0], imsz), \
            x_val.reshape(x_val.shape[0], imsz), \
            x_test.reshape(x_test.shape[0], imsz)

def linear_classifier_discrimination(model, data, C_range=[1], verbose=True, samples=None):
    """
    Compute the logistic regression classification accuracy.
    """
    import sklearn.linear_model as lm
    n_examples = data.shape[0]
    if samples is None:
        gnp.seed_rand(8)
        samples = model.generate_samples(n_samples=n_examples).asarray()

    x = np.r_[data, samples]
    t = np.r_[np.zeros(n_examples, dtype=np.int), np.ones(samples.shape[0], dtype=np.int)]

    best_acc = 0
    best_classifier = None

    for C in C_range:
        t_start = time.time()
        lr = lm.LogisticRegression(C=C, dual=False, random_state=8)
        lr.fit(x,t)
        acc = (lr.predict(x) == t).mean()

        if verbose:
            print 'C=%g  acc=%.4f' % (C, acc),
        if acc > best_acc:
            best_acc = acc
            best_classifier = lr 
            if verbose:
                print '*',
        else:
            if verbose:
                print ' ',

        if verbose:
            print 'time=%.2f' % (time.time() - t_start)

    return best_acc, best_classifier

def eval_filter_thresholds(model, data, thres_range=np.arange(0, 0.9, 0.1)):
    """
    Evaluate the discrimination performance at different threshold levels.
    """
    #data = data[:1000]
    n_thres = len(thres_range)

    # base classifier
    acc, c = linear_classifier_discrimination(model, data, verbose=False)
    acc_rec = np.zeros(n_thres, dtype=np.float)
    acc_best = 1
    for i in range(n_thres):
        t_start = time.time()
        ftr = gen.ClassifierSampleFilter(c, thres_range[i])
        ftnet = gen.StochasticGenerativeNetWithFilter(model, ftr)
        s = ftnet.generate_samples(n_samples=data.shape[0]).asarray()
        acc_ftr, c_ftr = linear_classifier_discrimination(None, data, verbose=False, samples=s)
        print 'thres=%.2f, acc=%.4f, time=%.2f' % (thres_range[i], acc_ftr, time.time() - t_start)
        acc_rec[i] = acc_ftr
        if acc_ftr < acc_best:
            acc_best = acc_ftr
            best_ftnet = ftnet

    return best_ftnet

def get_filtered_model(net, data):
    acc, lr = linear_classifier_discrimination(net, data, C_range=[1, 10, 100, 1000], verbose=True)
    filter = gen.ClassifierSampleFilter(lr, threshold=0.8)
    filtered_net = gen.StochasticGenerativeNetWithFilter(net, filter)
    return filtered_net, lr

def test_single_filter_old(net, data, base_samples, base_classifier, threshold, base_filter=None):
    """
    net: the base net
    data: training data
    base_samples: samples generated by the base model with the base filter
    base_classifier: classifier trained to discriminate data from base_samples
    threshold: threshold used for the filter
    """
    if base_classifier is None:
        ftr = gen.BlankSampleFilter()
    else:
        ftr = gen.ClassifierSampleFilter(base_classifier, threshold, prev=base_filter)
    s = ftr.filter(base_samples)

    n_base = base_samples.shape[0]
    n_s = s.shape[0]

    print 'Filtered out %d/%d ~ %%%.1f' % (n_base - n_s, n_base, 100.0 * (n_base - n_s) / n_base)

    ftnet = gen.StochasticGenerativeNetWithFilter(net, ftr)
    ss = ftnet.generate_samples(n_samples=data.shape[0]).asarray()

    acc, c = linear_classifier_discrimination(net, data, samples=ss)

    return ftr, c

def test_single_filter(net, data, threshold, base_samples=None, base_classifier=None, base_filter=None):
    pass

def log_exp_sum_1d(x):
    """
    This computes log(exp(x_1) + exp(x_2) + ... + exp(x_n)) as 
    x* + log(exp(x_1-x*) + exp(x_2-x*) + ... + exp(x_n-x*)), where x* is the
    max over all x_i.  This can avoid numerical problems.
    """
    x_max = x.max()
    if isinstance(x, gnp.garray):
        return x_max + gnp.log(gnp.exp(x - x_max).sum())
    else:
        return x_max + np.log(np.exp(x - x_max).sum())

def log_exp_sum(x, axis=1):
    x_max = x.max(axis=axis)
    if isinstance(x, gnp.garray):
        return (x_max + gnp.log(gnp.exp(x - x_max[:,gnp.newaxis]).sum(axis=axis))).asarray()
    else:
        return x_max + np.log(np.exp(x - x_max[:,np.newaxis]).sum(axis=axis))

class KDE(object):
    """
    Kernel density estimation.
    """
    def __init__(self, data, sigma):
        self.x = gnp.garray(data) if not isinstance(data, gnp.garray) else data
        self.sigma = sigma
        self.N = self.x.shape[0]
        self.d = self.x.shape[1]
        self._ek =  ker.EuclideanKernel()

        self.factor = float(-np.log(self.N) - self.d / 2.0 * np.log(2 * np.pi * self.sigma**2))

    def _log_likelihood(self, data):
        return log_exp_sum(-self._ek.compute_kernel_transformation(self.x, data) / (2 * self.sigma**2), axis=1) + self.factor

    def log_likelihood(self, data, batch_size=1000):
        n_cases = data.shape[0]
        if n_cases <= batch_size:
            return self._log_likelihood(data)
        else:
            n_batches = (n_cases + batch_size - 1) / batch_size
            log_like = np.zeros(n_cases, dtype=np.float)

            for i_batch in range(n_batches):
                i_start = i_batch * batch_size
                i_end = n_cases if (i_batch + 1 == n_batches) else (i_start + batch_size)
                log_like[i_start:i_end] = self._log_likelihood(data[i_start:i_end])

            return log_like

    def likelihood(self, data):
        """
        data is a n_example x n_dims matrix.
        """
        return np.exp(self.log_likelihood(data))

    def average_likelihood(self, data):
        return self.likelihood(data).mean()

    def average_log_likelihood(self, data, batch_size=1000):
        return self.log_likelihood(data, batch_size=batch_size).mean()

    def average_std_log_likelihood(self, data, batch_size=1000):
        l = self.log_likelihood(data)
        return l.mean(), l.std()

    def average_se_log_likelihood(self, data, batch_size=1000):
        l = self.log_likelihood(data)
        return l.mean(), l.std() / np.sqrt(data.shape[0])

class AlternativeKDE(object):
    """
    Kernel density estimation.
    """
    def __init__(self, data, sigma):
        self.x = data if not isinstance(data, gnp.garray) else data.asarray()
        self.sigma = sigma
        self.N = self.x.shape[0]
        self.d = self.x.shape[1]

    def _compute_log_prob(self, data, batch_size=1000):
        """
        Break down data into smaller pieces so large matrix will also work.
        """
        data = data if not isinstance(data, gnp.garray) else data.asarray()
        n_cases = data.shape[0]
        K = np.zeros((n_cases, self.N), dtype=np.float)
        log_prob = np.zeros(n_cases, dtype=np.float)
        for i in range(n_cases):
            K[i] = -((self.x - data[i])**2).sum(axis=1) / (2 * self.sigma**2)
            log_prob[i] = log_exp_sum_1d(K[i]) - np.log(self.N) - self.d / 2.0 * (np.log(2 * np.pi) + 2 * np.log(self.sigma))

        return log_prob

    def likelihood(self, data):
        """
        data is a n_example x n_dims matrix.
        """
        return np.exp(self._compute_log_prob(data))

    def average_likelihood(self, data):
        return self.likelihood(data).mean()

    def log_likelihood(self, data):
        # return np.log(self._compute_kde(data) + 1e-50)# - self.d / 2.0 * (np.log(2 * np.pi) + 2 * np.log(self.sigma))
        return self._compute_log_prob(data)

    def average_log_likelihood(self, data):
        return self.log_likelihood(data).mean()


def kde_evaluation(test_data, samples, sigma_range=np.arange(0.1, 0.3, 0.01), verbose=True):
    best_log_likelihood = float('-inf')
    for sigma in sigma_range:
        log_likelihood = KDE(samples, sigma).average_log_likelihood(test_data)
        if log_likelihood > best_log_likelihood:
            best_log_likelihood = log_likelihood
        if verbose:
            print 'sigma=%g, log_likelihood=%.2f' % (sigma, log_likelihood)

    if verbose:
        print '===================='
        print 'Best log_likelihood=%.2f' % best_log_likelihood
        print ''
    return best_log_likelihood

def kde_evaluation_tfd(test_data, samples, sigma_range=np.arange(0.05, 0.25, 0.01), verbose=True):
    return kde_evaluation(test_data, samples, sigma_range, verbose)

def kde_evaluation_all_folds(test_data, samples, sigma_range=np.arange(0.05, 0.25, 0.01), verbose=True):
    n_folds = len(samples)
    best_log_likelihood = float('-inf')
    for sigma in sigma_range:
        log_likelihood = [KDE(samples[i], sigma).average_log_likelihood(test_data[i]) for i in range(n_folds)]
        avg_log_likelihood = sum(log_likelihood) / float(n_folds)
        if avg_log_likelihood > best_log_likelihood:
            best_log_likelihood = avg_log_likelihood
        if verbose:
            print 'sigma=%5g, log_likelihood=%8.2f   [%s]' % (sigma, avg_log_likelihood, ', '.join(['%8.2f' % l for l in log_likelihood]))

    if verbose:
        print '===================='
        print 'Best log_likelihood=%.2f' % best_log_likelihood
        print ''
    return best_log_likelihood

def generate_fold_samples(net, fold_model_format, ae=None, fold_ae_format=None, n_samples=10000, n_folds=5):
    samples = []
    for fold in range(n_folds):
        net.load_model_from_file(fold_model_format % fold)
        if ae is not None:
            ae.load_model_from_file(fold_ae_format % fold)
            net.autoencoder = ae
        samples.append(net.generate_samples(n_samples=n_samples))

    return samples

def get_fold_data(set_name, n_folds=5):
    data = []
    for i_fold in range(n_folds):
        x_train, x_val, x_test = load_tfd_fold(i_fold)
        if set_name == 'train':
            data.append(x_train)
        elif set_name == 'val':
            data.append(x_val)
        elif set_name == 'test':
            data.append(x_test)
    return data

def kde_eval_mnist(net, test_data, n_samples=10000, sigma_range=np.arange(0.1, 0.3, 0.01), verbose=True):
    s = net.generate_samples(n_samples=n_samples)
    best_log_likelihood = float('-inf')
    best_se = 0
    best_sigma = 0
    for sigma in sigma_range:
        log_likelihood, se = KDE(s, sigma).average_se_log_likelihood(test_data)
        if log_likelihood > best_log_likelihood:
            best_log_likelihood = log_likelihood
            best_se = se 
            best_sigma = sigma
        if verbose:
            print 'sigma=%g, log_likelihood=%.2f (%.2f)' % (sigma, log_likelihood, se)

    if verbose:
        print '===================='
        print 'Best log_likelihood=%.2f (%.2f)' % (best_log_likelihood, best_se)
        print ''
    return best_log_likelihood, best_se, best_sigma

def kde_eval_tfd(net, test_data_all_folds, n_samples=10000, sigma_range=np.arange(0.05, 0.25, 0.01), verbose=True):
    s = net.generate_samples(n_samples=n_samples)
    best_log_likelihood = float('-inf')
    n_folds = len(test_data_all_folds)
    for sigma in sigma_range:
        kde = KDE(s, sigma)
        log_likelihood = [kde.average_log_likelihood(test_data_all_folds[i]) for i in range(n_folds)]
        avg_log_likelihood = sum(log_likelihood) / float(n_folds)
        avg_se = np.array(log_likelihood).std() / np.sqrt(n_folds)
        if avg_log_likelihood > best_log_likelihood:
            best_log_likelihood = avg_log_likelihood
            best_se = avg_se
            best_sigma = sigma
        if verbose:
            print 'sigma=%5g, log_likelihood=%8.2f (%.2f)  [%s]' % (sigma, avg_log_likelihood, avg_se, ', '.join(['%8.2f' % l for l in log_likelihood]))

    if verbose:
        print '===================='
        print 'Best log_likelihood=%.2f (%.2f)' % (best_log_likelihood, best_se)
        print ''
    return best_log_likelihood, best_se, best_sigma




================================================
FILE: generate_sample_figures.py
================================================
"""
Script used for generating sample figures used in the paper.

Yujia Li, 02/2015
"""

import core.generative as gen
import pynn.nn as nn
import matplotlib.pyplot as plt
import vistools as vt
import visualize as vis
import dataio.tfd as tfd
import dataio.mnist as mnistio
import gnumpy as gnp
import numpy as np
import os

plt.ion()

# fill in the paths to the model files here
BEST_MNIST_INPUT_SPACE_MODEL = ''
BEST_MNIST_AUTOENCODER = ''
BEST_MNIST_CODE_SPACE_MODEL = ''
BEST_TFD_INPUT_SPACE_MODEL = ''
BEST_TFD_AUTOENCODER = ''
BEST_TFD_CODE_SPACE_MODEL = ''

def get_mnist_input_space_model():
    net = gen.StochasticGenerativeNet()
    net.load_model_from_file(BEST_MNIST_INPUT_SPACE_MODEL)
    return net

def get_mnist_code_space_model():
    ae = nn.AutoEncoder()
    ae.load_model_from_file(BEST_MNIST_AUTOENCODER)
    net = gen.StochasticGenerativeNetWithAutoencoder()
    net.load_model_from_file(BEST_MNIST_CODE_SPACE_MODEL)
    net.autoencoder = ae
    return net

def get_tfd_input_space_model():
    net = gen.StochasticGenerativeNet()
    net.load_model_from_file(BEST_TFD_INPUT_SPACE_MODEL)
    return net

def get_tfd_code_space_model():
    ae = nn.AutoEncoder()
    ae.load_model_from_file(BEST_TFD_AUTOENCODER)
    net = gen.StochasticGenerativeNetWithAutoencoder()
    net.load_model_from_file(BEST_TFD_CODE_SPACE_MODEL)
    net.autoencoder = ae
    return net

def get_model(dataset='mnist', mode='input_space'):
    if dataset == 'mnist':
        if mode == 'input_space':
            return get_mnist_input_space_model()
        elif mode == 'code_space':
            return get_mnist_code_space_model()
    elif dataset == 'tfd':
        if mode == 'input_space':
            return get_tfd_input_space_model()
        elif mode == 'code_space':
            return get_tfd_code_space_model()

def generate_samples(dataset='mnist', mode='input_space'):
    imsz = [28,28] if dataset=='mnist' else [48,48]
    net = get_model(dataset=dataset, mode=mode)
    plt.figure()
    vt.bwpatchview(net.generate_samples(n_samples=30).asarray(), imsz, 5, gridintensity=1)
    if not os.path.exists('figs'):
        os.makedirs('figs')
    plt.savefig('figs/samples_%s_%s.pdf' % (dataset, mode), bbox_inches='tight')

def generate_all_samples():
    generate_samples(dataset='mnist', mode='input_space')
    generate_samples(dataset='mnist', mode='code_space')
    #generate_samples(dataset='tfd', mode='input_space')
    #generate_samples(dataset='tfd', mode='code_space')

def load_train_data(dataset='mnist'):
    if dataset == 'mnist':
        train_data, _, _ = mnistio.load_data()
    elif dataset == 'tfd':
        train_data, _, _ = tfd.load_proper_fold(0, 'unlabeled', scale=True)
        train_data = train_data.reshape(train_data.shape[0], np.prod(train_data.shape[1:]))

    return train_data

def get_nearest_neighbor(dataset='mnist', mode='input_space'):
    imsz = [28,28] if dataset=='mnist' else [48,48]
    net = get_model(dataset=dataset, mode=mode)
    train_data = load_train_data(dataset=dataset)

    if not os.path.exists('figs'):
        os.makedirs('figs')
    vis.nn_search(net.generate_samples(n_samples=12), train_data, top_k=1, imsz=imsz,
            orientation='horizontal', output_file='figs/nn_%s_%s.pdf' % (dataset, mode), pad=0.1)

def get_all_nearest_neighbors():
    get_nearest_neighbor(dataset='mnist', mode='input_space')
    get_nearest_neighbor(dataset='mnist', mode='code_space')
    #get_nearest_neighbor(dataset='tfd', mode='input_space')
    #get_nearest_neighbor(dataset='tfd', mode='code_space')

def get_morphing_figure(dataset='mnist', mode='input_space'):
    imsz = [28,28] if dataset=='mnist' else [48,48]
    net = get_model(dataset=dataset, mode=mode)
    plt.figure()
    gnp.seed_rand(8)
    vis.generation_on_a_line(net, n_points=24, imsz=imsz, nrows=10, h_seeds=net.sample_hiddens(5))

    if not os.path.exists('figs'):
        os.makedirs('figs')
    plt.savefig('figs/morphing_%s_%s.pdf' % (dataset, mode), bbox_inches='tight')

def get_all_morphing_figures():
    get_morphing_figure(dataset='mnist', mode='code_space')
    #get_morphing_figure(dataset='tfd', mode='code_space')

if __name__ == '__main__':
    generate_all_samples()
    get_all_nearest_neighbors()
    get_all_morphing_figures()


================================================
FILE: test.py
================================================
"""
Debug tests for the datasetbias project.

Yujia Li, 09/2014
"""
import os
os.environ['GNUMPY_CPU_PRECISION'] = '64'

import pynn.nn as nn
import pynn.layer as ly
import pynn.loss as ls
import gnumpy as gnp
import numpy as np
import time
import math

import core.generative as gen

_GRAD_CHECK_EPS = 1e-6
_FDIFF_EPS = 1e-8

_TEMP_FILE_NAME = '_temp_.pdata'

_GOOD_COLOR_BEGINS = '\033[42m'
_BAD_COLOR_BEGINS = '\033[41m'
_COLOR_RESET = '\033[0m'

def good_colored_str(txt):
    return _GOOD_COLOR_BEGINS + txt + _COLOR_RESET

def bad_colored_str(txt):
    return _BAD_COLOR_BEGINS + txt + _COLOR_RESET

def vec_str(v):
    s = '[ '
    for i in range(len(v)):
        s += '%11.8f ' % v[i]
    s += ']'
    return s

def test_vec_pair(v1, msg1, v2, msg2, error_thres=_GRAD_CHECK_EPS):
    print msg1 + ' : ' + vec_str(v1)
    print msg2 + ' : ' + vec_str(v2)
    n_space = len(msg2) - len('diff')
    print ' ' * n_space + 'diff' + ' : ' + vec_str(v1 - v2)
    err = np.sqrt(((v1 - v2)**2).sum())
    print 'err : %.8f' % err

    success = err < error_thres
    print good_colored_str('** SUCCESS **') if success else \
            bad_colored_str('** FAIL **')

    return success

def finite_difference_gradient(f, x):
    grad = x * 0
    for i in range(len(x)):
        x_0 = x[i]
        x[i] = x_0 + _FDIFF_EPS
        f_plus = f(x)
        x[i] = x_0 - _FDIFF_EPS
        f_minus = f(x)
        grad[i] = (f_plus - f_minus) / (2 * _FDIFF_EPS)
        x[i] = x_0

    return grad

def fdiff_grad_generator(net, x, t, add_noise=False, seed=None):
    if t is not None:
        net.load_target(t)

    def f(w):
        if add_noise and seed is not None:
            gnp.seed_rand(seed)
        w_0 = net.get_param_vec()
        net.set_param_from_vec(w)
        net.forward_prop(x, add_noise=add_noise, compute_loss=True)
        loss = net.get_loss()
        net.set_param_from_vec(w_0)

        return loss

    return f

def test_net_io(f_create, f_create_void):
    net1 = f_create()
    print 'Testing %s I/O' % net1.__class__.__name__

    net1.save_model_to_file(_TEMP_FILE_NAME)

    net2 = f_create_void()
    net2.load_model_from_file(_TEMP_FILE_NAME)

    os.remove(_TEMP_FILE_NAME)

    print 'Net #1: \n' + str(net1)
    print 'Net #2: \n' + str(net2)
    test_passed = (str(net1) == str(net2))

    test_passed = test_passed and test_vec_pair(net1.get_param_vec(), 'Net #1',
            net2.get_param_vec(), 'Net #2')
    return test_passed

def test_databias_loss(loss_type, **kwargs):
    print 'Testing Loss <' + loss_type + '> ' \
            + ', '.join([str(k) + '=' + str(v) for k, v in kwargs.iteritems()])

    n_cases = 5
    n_datasets = 3
    in_dim = 2
    
    x = gnp.randn(n_cases, in_dim)
    s = np.arange(n_cases) % n_datasets

    loss = ls.get_loss_from_type_name(loss_type)
    loss.load_target(s, K=n_datasets, **kwargs)

    def f(w):
        return loss.compute_loss_and_grad(w.reshape(x.shape), compute_grad=True)[0]

    backprop_grad = loss.compute_loss_and_grad(x, compute_grad=True)[1].asarray().ravel()
    fdiff_grad = finite_difference_gradient(f, x.asarray().ravel())

    test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient',
            backprop_grad, '  Backpropagation Gradient')
    print ''
    return test_passed

def create_databias_net(dropout_rate):
    net = nn.NeuralNet(3, 2)
    net.add_layer(2, nonlin_type=ly.NONLIN_NAME_TANH, dropout=dropout_rate)
    net.add_layer(0, nonlin_type=ly.NONLIN_NAME_LINEAR, dropout=0)
    return net

def test_databias_loss_with_net(add_noise, loss_type, **kwargs):
    print 'Testing Loss <' + loss_type + '> with network, '\
            + ('with noise' if add_noise else 'without noise') + ', ' \
            + ', '.join([str(k) + '=' + str(v) for k, v in kwargs.iteritems()])
    n_cases = 5
    n_datasets = 3
    seed = 8
    dropout_rate = 0.5 if add_noise else 0

    net = create_databias_net(dropout_rate)
    net.set_loss(loss_type)
    print net
    x = gnp.randn(n_cases, net.in_dim)
    s = np.arange(n_cases) % n_datasets

    net.load_target(s, K=n_datasets, **kwargs)

    if add_noise:
        gnp.seed_rand(seed)
    net.clear_gradient()
    net.forward_prop(x, add_noise=add_noise, compute_loss=True)
    net.backward_prop()

    backprop_grad = net.get_grad_vec()

    f = fdiff_grad_generator(net, x, None, add_noise=add_noise, seed=seed)
    fdiff_grad = finite_difference_gradient(f, net.get_param_vec())

    test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient',
            backprop_grad, '  Backpropagation Gradient')
    print ''

    gnp.seed_rand(int(time.time()))
    return test_passed

def test_generative_mmd_loss(sigma=1):
    print 'Testing generative MMD loss, sigma=%g' % sigma
    n_dims = 3
    n_target = 5
    n_pred = 4

    target = gnp.randn(n_target, n_dims)
    pred = gnp.randn(n_pred, n_dims)

    mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_MMDGEN, sigma=sigma)
    mmd.load_target(target)

    def f(w):
        return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0]

    backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel()
    fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel())

    test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient',
            backprop_grad, '  Backpropagation Gradient')
    print ''
    return test_passed

def test_generative_multi_scale_mmd_loss(sigma=[1, 10], scale_weight=None):
    print 'Testing generative multi-scale MMD loss, sigma=%s' % str(sigma)
    n_dims = 3
    n_target = 5
    n_pred = 4

    target = gnp.randn(n_target, n_dims)
    pred = gnp.randn(n_pred, n_dims)

    mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_MMDGEN_MULTISCALE, sigma=sigma, scale_weight=scale_weight)
    mmd.load_target(target)

    def f(w):
        return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0]

    backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel()
    fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel())

    test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient',
            backprop_grad, '  Backpropagation Gradient')
    print ''
    return test_passed

def test_linear_time_mmd_loss(sigma=1.0, use_modified_loss=False, use_absolute_value=False):
    print 'Testing linear time MMD loss, sigma=%s' % str(sigma)
    n_dims = 3
    n_target = 4
    n_pred = 4

    target = gnp.randn(n_target, n_dims)
    pred = gnp.randn(n_pred, n_dims)

    mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_LINEAR_TIME_MMDGEN, sigma=sigma,
            use_modified_loss=use_modified_loss, use_absolute_value=use_absolute_value)
    mmd.load_target(target)

    def f(w):
        return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0]

    backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel()
    fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel())

    test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient',
            backprop_grad, '  Backpropagation Gradient')
    print ''
    return test_passed

def test_linear_time_minibatch_mmd_loss(sigma=1.0, minibatch_size=100):
    print 'Testing linear time minibatch MMD loss'
    n_dims = 3
    n_target = 10
    n_pred = 10

    target = gnp.randn(n_target, n_dims)
    pred = gnp.randn(n_pred, n_dims)

    mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_LINEAR_TIME_MINIBATCH_MMDGEN,
            sigma=sigma, minibatch_size=minibatch_size)
    mmd.load_target(target)
    print mmd

    def f(w):
        return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0]

    backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel()
    fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel())

    test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient',
            backprop_grad, '  Backpropagation Gradient')
    print ''
    return test_passed

def test_random_feature_mmd_loss(sigma=[1,10], scale_weight=[0.5, 1], n_features=3):
    print 'Testing random feature MMD loss'
    n_dims = 2
    n_target = 5
    n_pred = 5 

    target = gnp.randn(n_target, n_dims)
    pred = gnp.randn(n_pred, n_dims)

    mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_RANDOM_FEATURE_MMDGEN,
            sigma=sigma, scale_weight=scale_weight, n_features=n_features)
    mmd.load_target(target)
    print mmd

    def f(w):
        return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0]

    backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel()
    fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel())

    test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient',
            backprop_grad, '  Backpropagation Gradient')
    print ''
    return test_passed

def test_random_feature_mmd_loss_approximation(sigma=[1,10], scale_weight=[0.5,1],
        n_features=3):
    print 'Testing random feature MMD loss approximation error'

    n_dims = 2
    n_target = 5
    n_pred = 5 

    target = gnp.rand(n_target, n_dims)
    pred = gnp.rand(n_pred, n_dims)

    rand_mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_RANDOM_FEATURE_MMDGEN,
            sigma=sigma, scale_weight=scale_weight, n_features=n_features)
    rand_mmd.load_target(target)
    print rand_mmd

    mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_MMDGEN_MULTISCALE_PAIR,
            sigma=sigma, scale_weight=scale_weight)
    mmd.load_target(target)

    rand_loss, rand_grad = rand_mmd.compute_loss_and_grad(pred, compute_grad=True)
    true_loss, true_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)

    test_passed = test_vec_pair(rand_grad.asarray().ravel(), 'Approximate Gradient',
            true_grad.asarray().ravel(), '       True Gradient', error_thres=1e-2)
    test_passed = test_vec_pair(np.array([rand_loss]), 'Approximate Loss',
            np.array([true_loss]), '       True Loss', error_thres=1e-2) \
            and test_passed
    print ''
    return test_passed

def test_pair_mmd_loss_multiscale(sigma=[1, 10], scale_weight=None):
    print 'Testing generative pair multi-scale MMD loss'
    n_dims = 3
    n_target = 5
    n_pred = 4

    target = gnp.randn(n_target, n_dims)
    pred = gnp.randn(n_pred, n_dims)

    mmd = ls.get_loss_from_type_name(ls.LOSS_NAME_MMDGEN_MULTISCALE_PAIR, sigma=sigma, scale_weight=scale_weight)
    mmd.load_target(target)
    print mmd

    def f(w):
        return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0]

    backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel()
    fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel())

    test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient',
            backprop_grad, '  Backpropagation Gradient')
    print ''
    return test_passed

def test_diff_kernel_mmd_loss(sigma=[1], scale_weight=[1], loss_name=None):
    assert loss_name is not None

    print 'Testing differentiable kernel MMD loss <%s>' % loss_name

    n_dims = 3
    n_target = 5
    n_pred = 4

    target = gnp.randn(n_target, n_dims)
    pred = gnp.randn(n_pred, n_dims)

    mmd = ls.get_loss_from_type_name(loss_name, sigma=sigma, scale_weight=scale_weight)
    mmd.load_target(target)
    print mmd

    def f(w):
        return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0]

    backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel()
    fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel())

    test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient',
            backprop_grad, '  Backpropagation Gradient')
    print ''
    return test_passed

def test_diff_kernel_per_example_mmd_loss(sigma=[1], scale_weight=[1], pred_per_example=1, target_per_example=[1], loss_name=None):
    assert loss_name is not None

    print 'Testing differentiable kernel per example MMD loss <%s>' % loss_name

    if len(target_per_example) == 1:
        target_per_example = target_per_example * 3

    n_dims = 3
    n_target = sum(target_per_example)
    n_pred = len(target_per_example) * pred_per_example

    pred = gnp.randn(n_pred, n_dims)
    target = []
    for i_target in target_per_example:
        target.append(gnp.randn(i_target, n_dims))

    mmd = ls.get_loss_from_type_name(loss_name, sigma=sigma, scale_weight=scale_weight, pred_per_example=pred_per_example)
    mmd.load_target(target)
    print mmd

    def f(w):
        return mmd.compute_loss_and_grad(w.reshape(pred.shape), compute_grad=False)[0]

    backprop_grad = mmd.compute_loss_and_grad(pred, compute_grad=True)[1].asarray().ravel()
    fdiff_grad = finite_difference_gradient(f, pred.asarray().ravel())

    test_passed = test_vec_pair(fdiff_grad, 'Finite Difference Gradient',
            backprop_grad, '  Backpropagation Gradient')
    print ''
    return test_passed

def test_all_diff_kernel_per_example_mmd_loss():
    print ''
    print '==============================================================='
    print 'Testing differentiable kernel per example MMD loss (new design)'
    print '==============================================================='
    print ''

    sigma_list = [1, 10]
    scale_weight_list = [1.0, 3.0]
    target_per_example_list = [[1], [2], [1,2,3]]
    pred_per_example_list = [1,2,3]
    loss_list = [ls.LOSS_NAME_CPU_PER_EXAMPLE_MMDGEN_SQRT_GAUSSIAN]

    n_success = 0
    n_tests = 0
    for loss_name in loss_list:
        for sigma, scale_weight, target_per_example, pred_per_example in zip(sigma_list, scale_weight_list,
                target_per_example_list[:len(sigma_list)], pred_per_example_list[:len(sigma_list)]):
            if test_diff_kernel_per_example_mmd_loss([sigma], [scale_weight], pred_per_example, target_per_example, loss_name):
                n_success += 1
            n_tests += 1

        if test_diff_kernel_per_example_mmd_loss(sigma_list, scale_weight_list, pred_per_example_list[-1], target_per_example_list[-1], loss_name):
            n_success += 1

        n_tests += 1

    print '=============='
    print 'Test finished: %d/%d success, %d failed' % (n_success, n_tests, n_tests - n_success)
    print ''

    return n_success, n_tests



def test_all_diff_kernel_mmd_loss():
    print ''
    print '==================================================='
    print 'Testing differentiable kernel MMD loss (new design)'
    print '==================================================='
    print ''

    sigma_list = [1, 2.5, 10]
    scale_weight_list = [1.0, 2, 3.0]
    loss_list = [ls.LOSS_NAME_MMDGEN_GAUSSIAN, ls.LOSS_NAME_MMDGEN_LAPLACIAN,
            ls.LOSS_NAME_MMDGEN_LAPLACIAN_L1, ls.LOSS_NAME_MMDGEN_SQRT_GAUSSIAN,
            ls.LOSS_NAME_CPU_MMDGEN_GAUSSIAN, ls.LOSS_NAME_CPU_MMDGEN_SQRT_GAUSSIAN]

    n_success = 0
    n_tests = 0
    for loss_name in loss_list:
        for sigma, scale_weight in zip(sigma_list, scale_weight_list):
            if test_diff_kernel_mmd_loss([sigma], [scale_weight], loss_name):
                n_success += 1
            n_tests += 1

        if test_diff_kernel_mmd_loss(sigma_list, scale_weight_list, loss_name):
            n_success += 1
        n_tests += 1

    print '=============='
    print 'Test finished: %d/%d success, %d failed' % (n_success, n_tests, n_tests - n_success)
    print ''

    return n_success, n_tests

def test_all_generative_mmd_loss():
    print ''
    print '========================'
    print 'Testing data bias losses'
    print '========================'
    print ''

    n_success = 0
    if test_generative_mmd_loss(sigma=1):
        n_success += 1
    if test_generative_mmd_loss(sigma=1e-1):
        n_success += 1
    if test_generative_multi_scale_mmd_loss(sigma=[1], scale_weight=[1.0]):
        n_success += 1
    if test_generative_multi_scale_mmd_loss(sigma=[10], scale_weight=[2.0]):
        n_success += 1
    if test_generative_multi_scale_mmd_loss(sigma=[100], scale_weight=[2.0]):
        n_success += 1
    if test_generative_multi_scale_mmd_loss(sigma=[1, 10, 100], scale_weight=[1.0, 2.0, 3.0]):
        n_success += 1
    if test_linear_time_mmd_loss(sigma=1):
        n_success += 1
    if test_linear_time_mmd_loss(sigma=0.1):
        n_success += 1
    if test_linear_time_mmd_loss(sigma=1, use_modified_loss=True):
        n_success += 1
    if test_linear_time_mmd_loss(sigma=0.1, use_modified_loss=True):
        n_success += 1
    if test_linear_time_mmd_loss(sigma=1, use_modified_loss=True, use_absolute_value=True):
        n_success += 1
    if test_linear_time_mmd_loss(sigma=0.1, use_modified_loss=True, use_absolute_value=True):
        n_success += 1
    if test_linear_time_minibatch_mmd_loss(sigma=1.0, minibatch_size=2):
        n_success += 1
    if test_linear_time_minibatch_mmd_loss(sigma=0.1, minibatch_size=3):
        n_success += 1
    if test_pair_mmd_loss_multiscale(sigma=[1], scale_weight=[1.0]):
        n_success += 1
    if test_pair_mmd_loss_multiscale(sigma=[10], scale_weight=[2.0]):
        n_success += 1
    if test_pair_mmd_loss_multiscale(sigma=[100], scale_weight=[2.0]):
        n_success += 1
    if test_pair_mmd_loss_multiscale(sigma=[1, 10, 100], scale_weight=[1.0, 2.0, 3.0]):
        n_success += 1
    if test_random_feature_mmd_loss(sigma=[1], scale_weight=[1.0], n_features=3):
        n_success += 1
    if test_random_feature_mmd_loss(sigma=[1], scale_weight=[1.0], n_features=10):
        n_success += 1
    if test_random_feature_mmd_loss(sigma=[1, 10, 100], scale_weight=[1.0, 2.0, 3.0], n_features=3):
        n_success += 1
    if test_random_feature_mmd_loss(sigma=[1, 10, 100], scale_weight=[1.0, 2.0, 3.0], n_features=10):
        n_success += 1
    if test_random_feature_mmd_loss_approximation(sigma=[5], scale_weight=[1.0], n_features=1024):
        n_success += 1
    if test_random_feature_mmd_loss_approximation(sigma=[5, 10, 80], scale_weight=[1.0, 2.0, 3.0], n_features=1024):
        n_success += 1

    n_tests = 24 

    print '=============='
    print 'Test finished: %d/%d success, %d failed' % (n_success, n_tests, n_tests - n_success)
    print ''

    return n_success, n_tests

def run_all_tests():
    gnp.seed_rand(int(time.time()))

    n_success = 0
    n_tests = 0

    test_list = [test_all_generative_mmd_loss,
            test_all_diff_kernel_mmd_loss, 
            test_all_diff_kernel_per_example_mmd_loss]
    for batch_test in test_list:
        success_in_batch, tests_in_batch = batch_test()
        n_success += success_in_batch
        n_tests += tests_in_batch

    print ''
    print '==================='
    print 'All tests finished: %d/%d success, %d failed' % (n_success, n_tests, n_tests - n_success)
    print ''

if __name__ == '__main__':
    run_all_tests()



================================================
FILE: train.py
================================================
"""
Training script for MNIST/TFD.

Yujia Li, 01/2015
"""

import argparse

import cPickle as pickle
import pynn.nn as nn
import pynn.layer as ly
import pynn.loss as ls
import pynn.learner as learner
import core.generative as gen
import gnumpy as gnp
import numpy as np
import time
import dataio.mnist as mnistio
import dataio.tfd as tfd

import eval_mmd_generative_model as ev

# You may want to change this
OUTPUT_BASE_DIR = 'output'

def write_config(file_name, config):
    """
    file_name: output config file name
    config: dict containing all the configs.
    """
    with open(file_name, 'w') as f:
        for k, v in sorted(config.items(), key=lambda t: t[0]):
            f.write(str(k) + '=' + str(v) + '\n')

def cat_list(lst):
    return '_'.join([str(v) for v in lst])


def load_tfd_fold(fold=0):
    """
    Return train, val, test data for the particular fold.
    """
    # note that the training set used here is the 'unlabeled' set in TFD
    x_train, _, _ = tfd.load_proper_fold(fold, 'unlabeled', scale=True)
    x_val,   _, _ = tfd.load_proper_fold(fold, 'val', scale=True)
    x_test,  _, _ = tfd.load_proper_fold(fold, 'test', scale=True)

    imsz = np.prod(x_train.shape[1:])

    return x_train.reshape(x_train.shape[0], imsz), \
            x_val.reshape(x_val.shape[0], imsz), \
            x_test.reshape(x_test.shape[0], imsz)

def load_tfd_all_folds(set_name='val', n_folds=5):
    x = []
    for i_fold in range(n_folds):
        #xx, _, _ = tfd.load_fold(i_fold, set_name, scale=True)
        xx, _, _ = tfd.load_proper_fold(i_fold, set_name, scale=True)
        x.append(xx.reshape(xx.shape[0], np.prod(xx.shape[1:])))
    return x

def mnist_mmd_input_space(n_hids=[10,64,256,256,1024], sigma=[2,5,10,20,40,80], learn_rate=2, momentum=0.9):
    """
    n_hids: number of hidden units on all layers (top-down) in the generative network.
    sigma: a list of scales used for the kernel
    learn_rate, momentum: parameters for the learning process

    return: KDE log_likelihood on validation set.
    """
    gnp.seed_rand(8)

    x_train, x_val, x_test = mnistio.load_data()

    print ''
    print 'Training data: %d x %d' % x_train.shape

    in_dim = n_hids[0]
    out_dim = x_train.shape[1]

    net = gen.StochasticGenerativeNet(in_dim, out_dim)
    for i in range(1, len(n_hids)):
        net.add_layer(n_hids[i], nonlin_type=ly.NONLIN_NAME_RELU, dropout=0)
    net.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0)

    # place holder loss
    net.set_loss(ls.LOSS_NAME_MMDGEN, loss_after_nonlin=True, sigma=80, loss_weight=1000)

    print ''
    print '========'
    print 'Training'
    print '========'
    print ''
    print net
    print ''

    mmd_learner = gen.StochasticGenerativeNetLearner(net)
    mmd_learner.load_data(x_train)

    output_base = OUTPUT_BASE_DIR + '/mnist/input_space'

    #sigma = [2,5,10,20,40,80]
    sigma_weights = [1,1,1,1,1,1]
    #learn_rate = 1
    #momentum = 0.9

    minibatch_size = 1000
    n_sample_update_iters = 1
    max_iters = 40000
    i_checkpoint = 2000

    output_dir = output_base + '/nhids_%s_sigma_%s_lr_%s_m_%s' % (
            '_'.join([str(nh) for nh in n_hids]), '_'.join([str(s) for s in sigma]), str(learn_rate), str(momentum))

    print ''
    print '>>>> output_dir = %s' % output_dir
    print ''

    mmd_learner.set_output_dir(output_dir)
    #net.set_loss(ls.LOSS_NAME_MMDGEN_MULTISCALE, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000)
    net.set_loss(ls.LOSS_NAME_MMDGEN_SQRT_GAUSSIAN, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000)

    print '**********************************'
    print net.loss
    print '**********************************'
    print ''

    def f_checkpoint(i_iter, w):
        mmd_learner.save_checkpoint('%d' % i_iter)

    mmd_learner.train_sgd(minibatch_size=minibatch_size, n_samples_per_update=minibatch_size, 
            n_sample_update_iters=n_sample_update_iters, learn_rate=learn_rate, momentum=momentum, 
            weight_decay=0, learn_rate_schedule={10000:learn_rate/10.0},
            momentum_schedule={10000:1-(1-momentum)/10.0},
            learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0,
            max_iters=max_iters, iprint=100, i_exe=i_checkpoint, f_exe=f_checkpoint)

    mmd_learner.save_model()

    print ''
    print '===================='
    print 'Evaluating the model'
    print '===================='
    print ''

    log_prob, std, sigma = ev.kde_eval_mnist(net, x_val, verbose=False)
    test_log_prob, test_std, _ = ev.kde_eval_mnist(net, x_test, sigma_range=[sigma], verbose=False)

    print 'Validation: %.2f (%.2f)' % (log_prob, std)
    print 'Test      : %.2f (%.2f)' % (test_log_prob, test_std)
    print ''

    write_config(output_dir + '/params_and_results.cfg', { 'n_hids': n_hids,
        'sigma': sigma, 'sigma_weights': sigma_weights, 'learn_rate': learn_rate,
        'momentum': momentum, 'minibatch_size': minibatch_size, 
        'n_sample_update_iters': n_sample_update_iters, 'max_iters': max_iters,
        'i_checkpoint': i_checkpoint, 'val_log_prob': log_prob, 'val_std': std, 
        'test_log_prob': test_log_prob, 'test_std': test_std })

    print '>>>> output_dir = %s' % output_dir
    print ''

    return log_prob


def mnist_mmd_code_space(
        ae_n_hids=[1024, 32], 
        ae_dropout=[0.2, 0.5],
        ae_learn_rate=1e-1, 
        ae_momentum=0.9,
        mmd_n_hids=[10, 64, 256, 256, 1024], 
        mmd_sigma=1,
        mmd_learn_rate=2,
        mmd_momentum=0.9):
    """
    ae_n_hids: #hid for the encoder, bottom-up
    ae_dropout: the amount of dropout for each layer in the encoder, same order
    ae_learn_rate, ae_momentum: .
    mmd_n_hids: #hid for the generative net, top-down
    mmd_sigma: scale of the kernel
    mmd_learn_rate, mmd_momentum: .

    Return KDE log_likelihood on the validation set.
    """
    gnp.seed_rand(8)
    x_train, x_val, x_test = mnistio.load_data()

    common_output_base = OUTPUT_BASE_DIR + '/mnist/code_space'
    output_base = common_output_base + '/aeh_%s_dr_%s_aelr_%s_aem_%s_nh_%s_s_%s_lr_%s_m_%s' % (
            cat_list(ae_n_hids), cat_list(ae_dropout), str(ae_learn_rate), str(ae_momentum),
            cat_list(mmd_n_hids), str(mmd_sigma), str(mmd_learn_rate), str(mmd_momentum))

    #######################
    # Auto-encoder training
    #######################

    n_dims = x_train.shape[1]
    h_dim = ae_n_hids[-1]

    encoder = nn.NeuralNet(n_dims, h_dim)
    for i in range(len(ae_n_hids) - 1):
        encoder.add_layer(ae_n_hids[i], nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=ae_dropout[i])
    encoder.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=ae_dropout[-1])

    decoder = nn.NeuralNet(h_dim, n_dims)
    for i in range(len(ae_n_hids) - 1)[::-1]:
        decoder.add_layer(ae_n_hids[i], nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0)
    decoder.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0)
    decoder.set_loss(ls.LOSS_NAME_BINARY_CROSSENTROPY, loss_weight=1)

    autoenc = nn.AutoEncoder(encoder=encoder, decoder=decoder)

    print ''
    print autoenc
    print ''

    learn_rate = ae_learn_rate
    final_momentum = ae_momentum
    max_iters = 3000

    nn_pretrainer = learner.AutoEncoderPretrainer(autoenc)
    nn_pretrainer.load_data(x_train)
    nn_pretrainer.pretrain_network(learn_rate=1e-1, momentum=0.5, weight_decay=0, minibatch_size=100,
            max_grad_norm=10, max_iters=max_iters, iprint=100)

    nn_learner = learner.Learner(autoenc)
    nn_learner.set_output_dir(output_base + '/ae')
    nn_learner.load_data(x_train, x_train)

    def f_checkpoint(i_iter, w):
        nn_learner.save_checkpoint('%d' % i_iter)

    nn_learner.train_sgd(learn_rate=learn_rate, momentum=0, weight_decay=0, minibatch_size=100,
            learn_rate_schedule=None, momentum_schedule={50:0.5, 200:final_momentum}, 
            max_grad_norm=10, learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0,
            max_iters=max_iters, iprint=100, i_exe=2000, f_exe=f_checkpoint)
    nn_learner.save_checkpoint('best')

    ##################
    # Training MMD net
    ##################

    n_hids = mmd_n_hids

    in_dim = n_hids[0]
    out_dim = autoenc.encoder.out_dim

    net = gen.StochasticGenerativeNetWithAutoencoder(in_dim, out_dim, autoenc)
    for i in range(1, len(n_hids)):
        net.add_layer(n_hids[i], nonlin_type=ly.NONLIN_NAME_RELU, dropout=0)
    net.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0)

    print ''
    print '========'
    print 'Training'
    print '========'
    print ''
    print net
    print ''

    mmd_learner = gen.StochasticGenerativeNetLearner(net)
    mmd_learner.load_data(x_train)

    sigma = [mmd_sigma]
    sigma_weights = [1]
    learn_rate = mmd_learn_rate
    momentum = mmd_momentum

    minibatch_size = 1000
    n_sample_update_iters = 1
    max_iters = 40000
    i_checkpoint = 2000

    mmd_learner.set_output_dir(output_base + '/mmd')
    #net.set_loss(ls.LOSS_NAME_MMDGEN_MULTISCALE, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000)
    net.set_loss(ls.LOSS_NAME_MMDGEN_SQRT_GAUSSIAN, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000)

    print '**********************************'
    print net.loss
    print '**********************************'
    print ''

    def f_checkpoint(i_iter, w):
        mmd_learner.save_checkpoint('%d' % i_iter)

    mmd_learner.train_sgd(minibatch_size=minibatch_size, n_samples_per_update=minibatch_size, 
            n_sample_update_iters=n_sample_update_iters, learn_rate=learn_rate, momentum=momentum, 
            weight_decay=0, learn_rate_schedule={10000:learn_rate/10.0},
            momentum_schedule={10000:1-(1-momentum)/10.0},
            learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0,
            max_iters=max_iters, iprint=100, i_exe=i_checkpoint, f_exe=f_checkpoint)
    mmd_learner.save_model()

    # Evaluation

    print ''
    print '===================='
    print 'Evaluating the model'
    print '===================='
    print ''

    log_prob, std, sigma = ev.kde_eval_mnist(net, x_val, verbose=False)
    test_log_prob, test_std, _ = ev.kde_eval_mnist(net, x_test, sigma_range=[sigma], verbose=False)

    print 'Validation: %.2f (%.2f)' % (log_prob, std)
    print 'Test      : %.2f (%.2f)' % (test_log_prob, test_std)
    print ''

    write_config(output_base + '/params_and_results.cfg', { 
        'ae_n_hids' : ae_n_hids, 'ae_dropout' : ae_dropout, 'ae_learn_rate' : ae_learn_rate,
        'ae_momentum' : ae_momentum, 'mmd_n_hids': mmd_n_hids,
        'mmd_sigma': mmd_sigma, 'mmd_sigma_weights': sigma_weights, 'mmd_learn_rate': mmd_learn_rate,
        'mmd_momentum': mmd_momentum, 'mmd_minibatch_size': minibatch_size, 
        'mmd_n_sample_update_iters': n_sample_update_iters, 'mmd_max_iters': max_iters,
        'mmd_i_checkpoint': i_checkpoint, 'val_log_prob': log_prob, 'val_std': std, 
        'test_log_prob': test_log_prob, 'test_std': test_std })

    print '>>>> output_dir = %s' % output_base
    print ''

    return log_prob

def tfd_mmd_input_space(n_hids=[10,64,256,256,1024], sigma=[5,10,20,40,80,160], learn_rate=2, momentum=0.9):
    """
    return validation log prob.
    """
    gnp.seed_rand(8)

    # train on only one fold - that's enough as the training set is the same across folds
    x_train, x_val, x_test = load_tfd_fold(0)

    print ''
    print 'Training data: %d x %d' % x_train.shape

    in_dim = n_hids[0]
    out_dim = x_train.shape[1]

    net = gen.StochasticGenerativeNet(in_dim, out_dim)
    for i in range(1, len(n_hids)):
        net.add_layer(n_hids[i], nonlin_type=ly.NONLIN_NAME_RELU, dropout=0)
    net.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0)

    # place holder loss
    net.set_loss(ls.LOSS_NAME_MMDGEN, loss_after_nonlin=True, sigma=80, loss_weight=1000)

    print ''
    print '========'
    print 'Training'
    print '========'
    print ''
    print net
    print ''

    mmd_learner = gen.StochasticGenerativeNetLearner(net)
    mmd_learner.load_data(x_train)

    output_base = OUTPUT_BASE_DIR + '/tfd/input_space'

    #sigma = [2,5,10,20,40,80]
    sigma_weights = [1,1,1,1,1,1]
    #learn_rate = 1
    #momentum = 0.9

    minibatch_size = 1000
    n_sample_update_iters = 1
    max_iters = 48000
    i_checkpoint = 2000

    output_dir = output_base + '/nhids_%s_sigma_%s_lr_%s_m_%s' % (
            '_'.join([str(nh) for nh in n_hids]), '_'.join([str(s) for s in sigma]), str(learn_rate), str(momentum))

    print ''
    print '>>>> output_dir = %s' % output_dir
    print ''

    mmd_learner.set_output_dir(output_dir)
    #net.set_loss(ls.LOSS_NAME_MMDGEN_MULTISCALE, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000)
    net.set_loss(ls.LOSS_NAME_MMDGEN_SQRT_GAUSSIAN, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000)

    print '**********************************'
    print net.loss
    print '**********************************'
    print ''

    def f_checkpoint(i_iter, w):
        mmd_learner.save_checkpoint('%d' % i_iter)

    mmd_learner.train_sgd(minibatch_size=minibatch_size, n_samples_per_update=minibatch_size, 
            n_sample_update_iters=n_sample_update_iters, learn_rate=learn_rate, momentum=momentum, 
            weight_decay=0, learn_rate_schedule={10000:learn_rate/10.0},
            momentum_schedule={10000:1-(1-momentum)/10.0},
            learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0,
            max_iters=max_iters, iprint=100, i_exe=i_checkpoint, f_exe=f_checkpoint)

    mmd_learner.save_model()

    print ''
    print '===================='
    print 'Evaluating the model'
    print '===================='
    print ''

    x_val = load_tfd_all_folds('val')
    x_test = load_tfd_all_folds('test')

    log_prob, std, sigma = ev.kde_eval_tfd(net, x_val, verbose=False)
    test_log_prob, test_std, _ = ev.kde_eval_tfd(net, x_test, sigma_range=[sigma], verbose=False)

    print 'Validation: %.2f (%.2f)' % (log_prob, std)
    print 'Test      : %.2f (%.2f)' % (test_log_prob, test_std)
    print ''

    write_config(output_dir + '/params_and_results.cfg', { 'n_hids': n_hids,
        'sigma': sigma, 'sigma_weights': sigma_weights, 'learn_rate': learn_rate,
        'momentum': momentum, 'minibatch_size': minibatch_size, 
        'n_sample_update_iters': n_sample_update_iters, 'max_iters': max_iters,
        'i_checkpoint': i_checkpoint, 'val_log_prob': log_prob, 'val_std': std, 
        'test_log_prob': test_log_prob, 'test_std': test_std })

    print '>>>> output_dir = %s' % output_dir
    print ''

    return log_prob


def tfd_mmd_code_space(
        ae_n_hids=[512, 512, 128], 
        ae_dropout=[0.1, 0.1, 0.1],
        ae_learn_rate=1e-1, 
        ae_momentum=0,
        mmd_n_hids=[10, 64, 256, 256, 1024], 
        mmd_sigma=[1,2,5,10,20,40],
        mmd_learn_rate=1e-1,
        mmd_momentum=0.9):
    """
    ae_n_hids: #hid for the encoder, bottom-up
    ae_dropout: the amount of dropout for each layer in the encoder, same order
    ae_learn_rate, ae_momentum: .
    mmd_n_hids: #hid for the generative net, top-down
    mmd_sigma: scale of the kernel
    mmd_learn_rate, mmd_momentum: .

    Return KDE log_likelihood on the validation set.
    """
    gnp.seed_rand(8)
    x_train, x_val, x_test = load_tfd_fold(0)

    common_output_base = OUTPUT_BASE_DIR + '/tfd/code_space'
    output_base = common_output_base + '/aeh_%s_dr_%s_aelr_%s_aem_%s_nh_%s_s_%s_lr_%s_m_%s' % (
            cat_list(ae_n_hids), cat_list(ae_dropout), str(ae_learn_rate), str(ae_momentum),
            cat_list(mmd_n_hids), cat_list(mmd_sigma), str(mmd_learn_rate), str(mmd_momentum))

    #######################
    # Auto-encoder training
    #######################

    n_dims = x_train.shape[1]
    h_dim = ae_n_hids[-1]

    encoder = nn.NeuralNet(n_dims, h_dim)
    for i in range(len(ae_n_hids) - 1):
        encoder.add_layer(ae_n_hids[i], nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=ae_dropout[i])
    encoder.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=ae_dropout[-1])

    decoder = nn.NeuralNet(h_dim, n_dims)
    for i in range(len(ae_n_hids) - 1)[::-1]:
        decoder.add_layer(ae_n_hids[i], nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0)
    decoder.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0)
    decoder.set_loss(ls.LOSS_NAME_BINARY_CROSSENTROPY, loss_weight=1)

    autoenc = nn.AutoEncoder(encoder=encoder, decoder=decoder)

    print ''
    print autoenc
    print ''

    learn_rate = ae_learn_rate
    final_momentum = ae_momentum
    max_iters = 15000
    #max_iters = 200

    nn_pretrainer = learner.AutoEncoderPretrainer(autoenc)
    nn_pretrainer.load_data(x_train)
    nn_pretrainer.pretrain_network(learn_rate=1e-1, momentum=0.5, weight_decay=0, minibatch_size=100,
            max_grad_norm=10, max_iters=max_iters, iprint=100)

    nn_learner = learner.Learner(autoenc)
    nn_learner.set_output_dir(output_base + '/ae')
    nn_learner.load_data(x_train, x_train)

    def f_checkpoint(i_iter, w):
        nn_learner.save_checkpoint('%d' % i_iter)

    nn_learner.train_sgd(learn_rate=learn_rate, momentum=0, weight_decay=0, minibatch_size=100,
            learn_rate_schedule=None, momentum_schedule={50:0.5, 200:final_momentum}, 
            max_grad_norm=10, learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0,
            max_iters=max_iters, iprint=100, i_exe=2000, f_exe=f_checkpoint)
    nn_learner.save_checkpoint('best')

    ##################
    # Training MMD net
    ##################

    n_hids = mmd_n_hids

    in_dim = n_hids[0]
    out_dim = autoenc.encoder.out_dim

    net = gen.StochasticGenerativeNetWithAutoencoder(in_dim, out_dim, autoenc)
    for i in range(1, len(n_hids)):
        net.add_layer(n_hids[i], nonlin_type=ly.NONLIN_NAME_RELU, dropout=0)
    net.add_layer(0, nonlin_type=ly.NONLIN_NAME_SIGMOID, dropout=0)

    print ''
    print '========'
    print 'Training'
    print '========'
    print ''
    print net
    print ''

    mmd_learner = gen.StochasticGenerativeNetLearner(net)
    mmd_learner.load_data(x_train)

    sigma = mmd_sigma
    sigma_weights = [1] * len(sigma)
    learn_rate = mmd_learn_rate
    momentum = mmd_momentum

    minibatch_size = 1000
    n_sample_update_iters = 1
    max_iters = 48000
    #max_iters = 200
    i_checkpoint = 2000

    mmd_learner.set_output_dir(output_base + '/mmd')
    #net.set_loss(ls.LOSS_NAME_MMDGEN_MULTISCALE, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000)
    net.set_loss(ls.LOSS_NAME_MMDGEN_SQRT_GAUSSIAN, loss_after_nonlin=True, sigma=sigma, scale_weight=sigma_weights, loss_weight=1000)

    print '**********************************'
    print net.loss
    print '**********************************'
    print ''

    def f_checkpoint(i_iter, w):
        mmd_learner.save_checkpoint('%d' % i_iter)

    mmd_learner.train_sgd(minibatch_size=minibatch_size, n_samples_per_update=minibatch_size, 
            n_sample_update_iters=n_sample_update_iters, learn_rate=learn_rate, momentum=momentum, 
            weight_decay=0, learn_rate_schedule={10000:learn_rate/10.0},
            momentum_schedule={10000:1-(1-momentum)/10.0},
            learn_rate_drop_iters=0, decrease_type='linear', adagrad_start_iter=0,
            max_iters=max_iters, iprint=100, i_exe=i_checkpoint, f_exe=f_checkpoint)
    mmd_learner.save_model()

    # Evaluation

    print ''
    print '===================='
    print 'Evaluating the model'
    print '===================='
    print ''

    x_val = load_tfd_all_folds('val')
    x_test = load_tfd_all_folds('test')

    log_prob, std, sigma = ev.kde_eval_tfd(net, x_val, verbose=False)
    test_log_prob, test_std, _ = ev.kde_eval_tfd(net, x_test, sigma_range=[sigma], verbose=False)

    print 'Validation: %.2f (%.2f)' % (log_prob, std)
    print 'Test      : %.2f (%.2f)' % (test_log_prob, test_std)
    print ''

    write_config(output_base + '/params_and_results.cfg', { 
        'ae_n_hids' : ae_n_hids, 'ae_dropout' : ae_dropout, 'ae_learn_rate' : ae_learn_rate,
        'ae_momentum' : ae_momentum, 'mmd_n_hids': mmd_n_hids,
        'mmd_sigma': mmd_sigma, 'mmd_sigma_weights': sigma_weights, 'mmd_learn_rate': mmd_learn_rate,
        'mmd_momentum': mmd_momentum, 'mmd_minibatch_size': minibatch_size, 
        'mmd_n_sample_update_iters': n_sample_update_iters, 'mmd_max_iters': max_iters,
        'mmd_i_checkpoint': i_checkpoint, 'val_log_prob': log_prob, 'val_std': std, 
        'test_log_prob': test_log_prob, 'test_std': test_std })

    print '>>>> output_dir = %s' % output_base
    print ''

    return log_prob

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Model parameter tuning')
    parser.add_argument('-m', '--mode', choices=['mnistinput', 'mnistcode', 'tfdinput', 'tfdcode'])
    args = parser.parse_args()

    print ''
    print '************************'
    print 'Testing %s' % args.mode
    print '************************'
    print ''

    if args.mode == 'mnistinput':
        mnist_mmd_input_space()
    elif args.mode == 'mnistcode':
        mnist_mmd_code_space()
    elif args.mode == 'tfdinput':
        tfd_mmd_input_space()
    elif args.mode == 'tfdcode':
        tfd_mmd_code_space()




================================================
FILE: vistools.py
================================================
"""This module contains useful tools that makes data visualization easier.

Yujia Li, 03/2013
"""

import numpy as np
import matplotlib.pyplot as plt

def bwpatchview(data, imsz, nrows, gridwidth=1, gridintensity=0, rowmajor=True, ax=None):
    """Display a list of images in grid view.

    data: N*D matrix, each row is an image
    imsz: 2-D tuple, size of the images
    nrows: number of rows to arrange the images in a plot
    gridwidth: number of pixels to use for the grid
    gridintensity: the intensity value for the grid
    rowmajor: are the images stored in a row-major order or coloumn-major order
    ax: if provided, the image will be shown on the given axis.

    The images are orgainzed in rows from left to right.
    """

    N, D = data.shape
    sx, sy = imsz

    ncols = N // nrows
    if N % nrows:
        ncols += 1

    img = np.ones((sx * nrows + gridwidth * (nrows + 1), 
        sy * ncols + gridwidth * (ncols + 1))) * gridintensity

    for ix in range(0, nrows):
        for iy in range(0, ncols):
            idx = ix * ncols + iy
            if idx >= N:
                break
            xstart = gridwidth + ix * (sx + gridwidth)
            xend = xstart + sx
            ystart = gridwidth + iy * (sy + gridwidth)
            yend = ystart + sy

            if rowmajor:
                img[xstart:xend, ystart:yend] = data[idx].reshape(imsz)
            else:
                img[xstart:xend, ystart:yend] = data[idx].reshape((imsz[1], imsz[0])).T

    if ax != None:
        ax.imshow(img, cmap='gray', interpolation='nearest')
        ax.axis('off')
    else:
        plt.imshow(img, cmap='gray', interpolation='nearest')
        plt.axis('off')
    plt.show()

def cpatchview(data, imsz, nrows, gridwidth=1, gridintensity=0, rowmajor=True, ax=None, normalize=False):
    """Display a list of color images in grid view.

    data: N*(3*D) matrix, each row is a color image
    imsz: 2-D tuple, size of the images, should have prod(imsz)=D
    nrows: number of rows to arrange the images in a plot
    gridwidth: number of pixels to use for the grid
    gridintensity: the intensity value for the grid
    rowmajor: specify whether the images are stored in row-major order or 
        column-major order
    ax: if provided, the image will be shown on the given axis.
    normalize: if set and data is real valued, data is normalized to within [0,1]
    
    The images are organized in rows from left to right.
    """
    N, D = data.shape
    D = D / 3
    sx, sy = imsz

    ncols = N / nrows
    if N % nrows:
        ncols += 1

    img = np.ones((sx * nrows + gridwidth * (nrows + 1), 
        sy * ncols + gridwidth * (ncols + 1), 3), dtype=data.dtype) * gridintensity

    for ix in range(0, nrows):
        for iy in range(0, ncols):
            idx = ix * ncols + iy
            if idx >= N:
                break
            xstart = gridwidth + ix * (sx + gridwidth)
            xend = xstart + sx
            ystart = gridwidth + iy * (sy + gridwidth)
            yend = ystart + sy

            if rowmajor:
                img[xstart:xend, ystart:yend, :] = data[idx].reshape((3,sx,sy)).transpose((1,2,0))
            else:
                img[xstart:xend, ystart:yend] = data[idx].reshape((3,sy,sx)).transpose((2,1,0))

    if ax != None:
        if normalize:
            ax.imshow((img - img.min()) / (img.max() - img.min() + 1e-20), interpolation='nearest')
        else:
            ax.imshow(img, interpolation='nearest')
        ax.axis('off')
    else:
        if normalize:
            plt.imshow((img - img.min()) / (img.max() - img.min() + 1e-20), interpolation='nearest')
        else:
            plt.imshow(img, interpolation='nearest')
        plt.axis('off')
    plt.show()

def listpatchview(data, nrows, gridwidth=1, gridintensity=0, ax=None):
    """Display a list of images in grid view.

    data: a list of images of the same size, can be either color or gray
        images, but should be consistent.
    nrows: number of rows to arrange the images in a plot
    gridwidth: number of pixels to use for the grid
    gridintensity: the intensity value for the grid
    ax: if provided, the image will be shown on the given axis
    
    The images are organized in rows from left to right.
    """
    N = len(data)
    sx, sy = data[0].shape[:2]
    D = sx * sy

    ncols = N / nrows
    if N % nrows:
        ncols += 1

    if len(data[0].shape) < 3 or data[0].shape[2] == 1:
        n_color = 1
        img = np.ones((sx * nrows + gridwidth * (nrows + 1), 
            sy * ncols + gridwidth * (ncols + 1)),dtype=data[0].dtype) * gridintensity
    else:
        n_color = 3
        assert(data[0].shape[2] == n_color)
        img = np.ones((sx * nrows + gridwidth * (nrows + 1), 
            sy * ncols + gridwidth * (ncols + 1), n_color),dtype=data[0].dtype) * gridintensity

    for ix in range(0, nrows):
        for iy in range(0, ncols):
            idx = ix * ncols + iy
            if idx >= N:
                break
            xstart = gridwidth + ix * (sx + gridwidth)
            xend = xstart + sx
            ystart = gridwidth + iy * (sy + gridwidth)
            yend = ystart + sy

            if n_color == 3:
                img[xstart:xend, ystart:yend, :] = data[idx]
            else:
                img[xstart:xend, ystart:yend] = data[idx]

    if ax == None:
        ax = plt
    if n_color == 3:
        ax.imshow(img, interpolation='nearest')
    else:
        ax.imshow(img, cmap='gray', interpolation='nearest')
    ax.axis('off')
    plt.show()

def plot2dgaussian(mu, sigma, npoints=100, linespec=None, linewidth=1, ax=None, *args, **kwargs):
    """Plot a 2D Gaussian distribution. Showing on the plot are the mean of 
    the Gaussian and an ellipse corresponding to 1 standard deviation (not
    strictly speaking standard deviation, but similar).
    """
    eig, Q = np.linalg.eig(sigma)
    scale = np.sqrt(eig).reshape(1,2)

    x = np.zeros((npoints + 1, 2))

    for n in range(npoints):
        angle = 2 * np.pi * n / npoints
        x[n,:] = mu + (scale * np.array([[np.cos(angle), np.sin(angle)]])).dot(Q.T)

    x[npoints,:] = x[0,:]

    if ax == None:
        ax = plt

    if linespec:
        ax.plot(x[:,0], x[:,1], linespec, linewidth=linewidth, *args, **kwargs)
    else:
        ax.plot(x[:,0], x[:,1], linewidth=linewidth, *args, **kwargs)
    plt.show()

def intarray_to_rgb(x, cmap):
    """
    x: MxN is an array of int indices into the cmap
    cmap: int->(r,g,b) mapping

    Return converted y of shape MxNx3
    """
    y = np.zeros((x.shape[0], x.shape[1], 3), dtype=np.uint8)

    if isinstance(cmap, dict):
        for c in cmap:
            y[x == c] = cmap[c]
    elif isinstance(cmap, np.ndarray):
        for i in range(cmap.shape[0]):
            y[x == i] = cmap[i]

    return y

def pil_png_cmap_to_dict(pil_palette):
    """
    // cmap is a color map from PIL after loading a color png file. Format: list
    // of (rgb, idx) tuples. rgb is an integer representation of the RGB value.

    pil_palette is a list of palette values. Should be 3xC where C is the 
    number of colors.

    Return a dict of (idx -> (r,g,b)).
    """
    cm = {}
    p = np.array(pil_palette, dtype=np.uint8).reshape(len(pil_palette)/3, 3)
    for i in range(p.shape[0]):
        cm[i] = p[i]
    return cm



================================================
FILE: visualize.py
================================================
import matplotlib.pyplot as plt
import numpy as np
import gnumpy as gnp
import vistools as vt
import core.generative as gen
import os
import time
import core.util as util
import scipy.misc as misc

from mpl_toolkits.axes_grid1 import AxesGrid

plt.ion()

def nn_search(samples, database, top_k=1, imsz=[28,28], orientation='horizontal', output_file=None, pad=0.1):
    if orientation not in ['horizontal', 'vertical']:
        print '[Error] orientation must be either horizontal or vertical'
        return

    g_samples = util.to_garray(samples)
    g_database = util.to_garray(database)

    if isinstance(database, gnp.garray):
        database = database.asarray()
    if isinstance(samples, gnp.garray):
        samples = samples.asarray()

    n_samples, n_dims = samples.shape
    nn = np.empty((n_samples * top_k, n_dims), dtype=np.float)

    for i in range(n_samples):
        v = g_samples[i]
        d = ((g_database - v)**2).sum(axis=1)
        idx = d.asarray().argsort()
        top_candidates = database[idx[:top_k]]
        if orientation == 'horizontal':
            nn[np.arange(i, i+n_samples*top_k, n_samples)] = top_candidates
        elif orientation == 'vertical':
            nn[i*top_k:(i+1)*top_k] = top_candidates

    f = plt.figure()
    grid = AxesGrid(f, 111, nrows_ncols=(2,1), axes_pad=pad)

    vt.bwpatchview(samples, imsz, 1, gridintensity=1, ax=grid[0])
    if orientation == 'horizontal':
        vt.bwpatchview(nn, imsz, top_k, gridintensity=1, ax=grid[1])
    elif orientation == 'vertical':
        vt.bwpatchview(nn, imsz, n_samples, gridintensity=1, ax=grid[1])

    if output_file is not None:
        plt.savefig(output_file, bbox_inches='tight')

def view_checkpoints(model_dir, sigma, imsz=[28,28], figid=101):
    """
    checkpoint files should have a name matching the following:
    <model_dir>/checkpoint_<sigma>_<iter>.pdata
    """
    prefix = '%s/checkpoint_%s' % (model_dir, str(sigma))
    checkpoint_numbers = sorted([int(fpath.split('.')[0].split('_')[-1]) for fpath in os.listdir(model_dir) if fpath.startswith('checkpoint_%s' % str(sigma))])

    net = gen.StochasticGenerativeNet()

    plt.figure(figid, figsize=(10,8))
    ax = plt.subplot(111)

    for i in checkpoint_numbers:
        net.load_model_from_file(prefix + '_%d.pdata' % i)
        w = net.layers[-1].params.W.asarray()
        ax.cla()
        vt.bwpatchview(w[:400], imsz, int(np.sqrt(w[:400].shape[0])), rowmajor=True, gridintensity=1, ax=ax)
        plt.draw()
        plt.show()
        print 'Checkpoint %d' % i
        time.sleep(0.04)

def generation_on_a_line(net, n_points=100, imsz=[28,28], nrows=10, h_seeds=None):
    if h_seeds is None:
        h = net.sample_hiddens(2)
        z = gnp.zeros((n_points, h.shape[1]))
        diff = h[1] - h[0]
        step = diff / (n_points - 1)
        for i in range(n_points):
            z[i] = h[0] + step * i
    else:
        n_seeds = h_seeds.shape[0]
        z = gnp.zeros((n_points * n_seeds, h_seeds.shape[1]))
        for i in range(n_seeds):
            h0 = h_seeds[i]
            h1 = h_seeds[(i+1) % n_seeds]
            diff = h1 - h0
            step = diff / (n_points - 1)
            for j in range(n_points):
                z[i*n_points+j] = h0 + step * j

    x = net.generate_samples(z=z)
    vt.bwpatchview(x.asarray(), imsz, nrows, rowmajor=True, gridintensity=1)

def generate_morphing_video(net, h_seeds, n_points=100, imsz=[28,28], output_dir='video'):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    n_seeds = h_seeds.shape[0]
    z = gnp.zeros((n_points * n_seeds, h_seeds.shape[1]))

    for i in range(n_seeds):
        h0 = h_seeds[i]
        h1 = h_seeds[(i+1) % n_seeds]
        diff = h1 - h0
        step = diff / (n_points - 1)
        for j in range(n_points):
            z[i*n_points+j] = h0 + step * j

    x = net.generate_samples(z=z).asarray()
    for i in range(x.shape[0]):
        misc.imsave(output_dir + '/%d.png' % i, x[i].reshape(imsz))

###################################
# For old experiments
###################################

def plot_dataset(x, t, ax=None):
    if ax is None:
        plt.figure()
        ax = plt.subplot(111)
    ax.plot(x[t==0,0], x[t==0,1], 'o')
    ax.plot(x[t==1,0], x[t==1,1], 'o')

    x_min = x[:,0].min()
    x_max = x[:,0].max()
    y_min = x[:,1].min()
    y_max = x[:,1].max()
    ax.set_xlim([x_min - (x_max - x_min) / 10.0, x_max + (x_max - x_min) / 10.0])
    ax.set_ylim([y_min - (y_max - y_min) / 10.0, y_max + (y_max - y_min) / 10.0])

    plt.show()

    return ax

def plot_decision_boundary(f, x_range, y_range, density, ax=None, **kwargs):
    if ax is None:
        plt.figure()
        ax = plt.subplot(111)

    x, y = np.meshgrid(np.arange(x_range[0], x_range[1], density),
            np.arange(y_range[0], y_range[1], density))

    data = np.c_[x.reshape(x.size,1), y.reshape(y.size,1)]
    z = f(data).reshape(x.shape)

    ax.contour(x, y, z, levels=[0], **kwargs)

    plt.show()

    return ax
Download .txt
gitextract_4jb9m0vo/

├── .gitignore
├── README.md
├── core/
│   ├── __init__.py
│   ├── generative.py
│   ├── kernels.py
│   └── util.py
├── dataio/
│   ├── __init__.py
│   ├── mnist.py
│   └── tfd.py
├── eval_mmd_generative_model.py
├── generate_sample_figures.py
├── test.py
├── train.py
├── vistools.py
└── visualize.py
Download .txt
SYMBOL INDEX (286 symbols across 11 files)

FILE: core/generative.py
  class UnsupervisedMmdLoss (line 17) | class UnsupervisedMmdLoss(ls.Loss):
    method __init__ (line 24) | def __init__(self, **kwargs):
    method load_target (line 28) | def load_target(self, target, **kwargs):
    method _make_s_mat (line 39) | def _make_s_mat(self, n_pred, n_target):
    method compute_not_weighted_loss_and_grad (line 49) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
    method get_name (line 72) | def get_name(self):
    method get_id (line 75) | def get_id(self):
    method __repr__ (line 78) | def __repr__(self):
  class UnsupervisedMmdLossMultiScale (line 84) | class UnsupervisedMmdLossMultiScale(ls.Loss):
    method __init__ (line 91) | def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
    method load_target (line 102) | def load_target(self, target, **kwargs):
    method _make_s_mat (line 113) | def _make_s_mat(self, n_pred, n_target):
    method compute_not_weighted_loss_and_grad (line 123) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
    method get_name (line 154) | def get_name(self):
    method get_id (line 157) | def get_id(self):
    method __repr__ (line 160) | def __repr__(self):
  class LinearTimeUnsupervisedMmdLoss (line 166) | class LinearTimeUnsupervisedMmdLoss(ls.Loss):
    method __init__ (line 175) | def __init__(self, **kwargs):
    method load_target (line 181) | def load_target(self, target, **kwargs):
    method compute_not_weighted_loss_and_grad (line 192) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
    method get_name (line 242) | def get_name(self):
    method get_id (line 245) | def get_id(self):
    method __repr__ (line 248) | def __repr__(self):
  class LinearTimeMinibatchUnsupervisedMmdLoss (line 254) | class LinearTimeMinibatchUnsupervisedMmdLoss(ls.Loss):
    method __init__ (line 265) | def __init__(self, **kwargs):
    method load_target (line 270) | def load_target(self, target, **kwargs):
    method _make_s_mat (line 281) | def _make_s_mat(self, n_pred, n_target):
    method compute_not_weighted_loss_and_grad (line 291) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
    method get_name (line 329) | def get_name(self):
    method get_id (line 332) | def get_id(self):
    method __repr__ (line 335) | def __repr__(self):
  class RandomFeatureMmdLoss (line 341) | class RandomFeatureMmdLoss(ls.Loss):
    method __init__ (line 351) | def __init__(self, sigma=[1.0], scale_weight=None, n_features=1024, **...
    method _generate_random_matrix (line 365) | def _generate_random_matrix(self, n_features, n_dims, sigma):
    method _generate_random_features (line 374) | def _generate_random_features(self, x, w):
    method load_target (line 378) | def load_target(self, target, **kwargs):
    method compute_not_weighted_loss_and_grad (line 396) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
    method get_name (line 421) | def get_name(self):
    method get_id (line 424) | def get_id(self):
    method __repr__ (line 427) | def __repr__(self):
  class PairMmdLossMultiScale (line 434) | class PairMmdLossMultiScale(ls.Loss):
    method __init__ (line 444) | def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
    method load_target (line 455) | def load_target(self, target, **kwargs):
    method _make_s_mat (line 466) | def _make_s_mat(self, n_pred, n_target):
    method compute_not_weighted_loss_and_grad (line 475) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
    method get_name (line 506) | def get_name(self):
    method get_id (line 509) | def get_id(self):
    method __repr__ (line 512) | def __repr__(self):
  class DifferentiableKernelMmdLoss (line 522) | class DifferentiableKernelMmdLoss(ls.Loss):
    method __init__ (line 526) | def __init__(self, **kwargs):
    method load_target (line 529) | def load_target(self, target, **kwargs):
    method _make_s_mat (line 536) | def _make_s_mat(self, n_pred, n_target):
    method compute_not_weighted_loss_and_grad (line 555) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
  class MultiScaleDifferentiableKernelMmdLoss (line 561) | class MultiScaleDifferentiableKernelMmdLoss(DifferentiableKernelMmdLoss):
    method __init__ (line 565) | def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
  class GaussianKernelMmdLoss (line 580) | class GaussianKernelMmdLoss(MultiScaleDifferentiableKernelMmdLoss):
    method __init__ (line 587) | def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
    method compute_not_weighted_loss_and_grad (line 591) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
    method get_name (line 621) | def get_name(self):
    method get_id (line 624) | def get_id(self):
    method __repr__ (line 627) | def __repr__(self):
  class LaplacianKernelMmdLoss (line 633) | class LaplacianKernelMmdLoss(MultiScaleDifferentiableKernelMmdLoss):
    method __init__ (line 637) | def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
    method compute_not_weighted_loss_and_grad (line 641) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
    method get_name (line 685) | def get_name(self):
    method get_id (line 688) | def get_id(self):
    method __repr__ (line 691) | def __repr__(self):
  class LaplacianL1KernelMmdLoss (line 697) | class LaplacianL1KernelMmdLoss(MultiScaleDifferentiableKernelMmdLoss):
    method __init__ (line 701) | def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
    method compute_not_weighted_loss_and_grad (line 705) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
    method get_name (line 736) | def get_name(self):
    method get_id (line 739) | def get_id(self):
    method __repr__ (line 742) | def __repr__(self):
  class SqrtGaussianKernelMmdLoss (line 748) | class SqrtGaussianKernelMmdLoss(GaussianKernelMmdLoss):
    method __init__ (line 755) | def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
    method compute_not_weighted_loss_and_grad (line 759) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
    method get_name (line 764) | def get_name(self):
    method get_id (line 767) | def get_id(self):
    method __repr__ (line 770) | def __repr__(self):
  class CpuDifferentiableKernelMmdLoss (line 776) | class CpuDifferentiableKernelMmdLoss(ls.Loss):
    method __init__ (line 780) | def __init__(self, **kwargs):
    method load_target (line 783) | def load_target(self, target, **kwargs):
    method _make_s_mat (line 790) | def _make_s_mat(self, n_pred, n_target):
    method compute_not_weighted_loss_and_grad (line 803) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
  class CpuMultiScaleDifferentiableKernelMmdLoss (line 809) | class CpuMultiScaleDifferentiableKernelMmdLoss(CpuDifferentiableKernelMm...
    method __init__ (line 813) | def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
  class CpuGaussianKernelMmdLoss (line 828) | class CpuGaussianKernelMmdLoss(CpuMultiScaleDifferentiableKernelMmdLoss):
    method __init__ (line 835) | def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
    method compute_not_weighted_loss_and_grad (line 839) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
    method get_name (line 870) | def get_name(self):
    method get_id (line 873) | def get_id(self):
    method __repr__ (line 876) | def __repr__(self):
  class CpuSqrtGaussianKernelMmdLoss (line 882) | class CpuSqrtGaussianKernelMmdLoss(CpuGaussianKernelMmdLoss):
    method __init__ (line 889) | def __init__(self, sigma=[1.0], scale_weight=None, **kwargs):
    method compute_not_weighted_loss_and_grad (line 893) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
    method get_name (line 898) | def get_name(self):
    method get_id (line 901) | def get_id(self):
    method __repr__ (line 904) | def __repr__(self):
  class CpuPerExampleSqrtGaussianKernelMmdLoss (line 910) | class CpuPerExampleSqrtGaussianKernelMmdLoss(ls.Loss):
    method __init__ (line 914) | def __init__(self, sigma=[1.0], scale_weight=None, pred_per_example=1,...
    method load_target (line 919) | def load_target(self, target, **kwargs):
    method compute_not_weighted_loss_and_grad (line 928) | def compute_not_weighted_loss_and_grad(self, pred, compute_grad=False):
    method get_name (line 951) | def get_name(self):
    method get_id (line 954) | def get_id(self):
    method __repr__ (line 957) | def __repr__(self):
  class StochasticGenerativeNet (line 967) | class StochasticGenerativeNet(nn.NeuralNet):
    method __init__ (line 976) | def __init__(self, in_dim=0, out_dim=0):
    method sample_hiddens (line 979) | def sample_hiddens(self, n_samples):
    method generate_samples (line 986) | def generate_samples(self, z=None, n_samples=100, sample_batch_size=10...
  class StochasticGenerativeNetWithAutoencoder (line 1009) | class StochasticGenerativeNetWithAutoencoder(StochasticGenerativeNet):
    method __init__ (line 1014) | def __init__(self, in_dim=0, out_dim=0, autoencoder=None):
    method _generate_code_samples (line 1018) | def _generate_code_samples(self, z=None, n_samples=100, sample_batch_s...
    method generate_samples (line 1022) | def generate_samples(self, z=None, n_samples=100, sample_batch_size=10...
    method load_target (line 1026) | def load_target(self, target, *args, **kwargs):
  class StochasticGenerativeNetWithAutoencoderContainer (line 1033) | class StochasticGenerativeNetWithAutoencoderContainer(object):
    method __init__ (line 1038) | def __init__(self, net, autoencoder):
    method generate_samples (line 1042) | def generate_samples(self, z=None, n_samples=100, sample_batch_size=10...
  class SampleFilter (line 1046) | class SampleFilter(object):
    method __init__ (line 1050) | def __init__(self):
    method filter (line 1053) | def filter(self, x):
  class BlankSampleFilter (line 1062) | class BlankSampleFilter(SampleFilter):
    method filter (line 1066) | def filter(self, x):
  class ClassifierSampleFilter (line 1069) | class ClassifierSampleFilter(SampleFilter):
    method __init__ (line 1073) | def __init__(self, classifier, threshold, prev=None):
    method filter (line 1086) | def filter(self, x):
  class ClassifierSampleStochasticFilter (line 1099) | class ClassifierSampleStochasticFilter(SampleFilter):
    method __init__ (line 1104) | def __init__(self, classifier, prev=None):
    method filter (line 1111) | def filter(self, x):
  class StochasticGenerativeNetWithFilter (line 1128) | class StochasticGenerativeNetWithFilter(object):
    method __init__ (line 1135) | def __init__(self, net, sample_filter):
    method generate_samples (line 1143) | def generate_samples(self, z=None, n_samples=100):
  class StochasticGenerativeNetLearner (line 1163) | class StochasticGenerativeNetLearner(learner.Learner):
    method __init__ (line 1167) | def __init__(self, net):
    method load_data (line 1175) | def load_data(self, x_train):
    method load_train_target (line 1178) | def load_train_target(self):
    method sample_hiddens (line 1181) | def sample_hiddens(self):
    method f_and_fprime (line 1184) | def f_and_fprime(self, w):
    method create_minibatch_generator (line 1196) | def create_minibatch_generator(self, minibatch_size):
    method f_and_fprime_minibatch (line 1200) | def f_and_fprime_minibatch(self, w):
    method train_stochastic_lbfgs (line 1219) | def train_stochastic_lbfgs(self, **kwargs):
    method f_info (line 1234) | def f_info(self, w):
    method _process_options (line 1270) | def _process_options(self, kwargs):
    method f_post_training (line 1293) | def f_post_training(self):
    method save_model (line 1298) | def save_model(self):
    method save_checkpoint (line 1302) | def save_checkpoint(self, label):
  class StochasticGenerativeNetLearnerAutoScale (line 1305) | class StochasticGenerativeNetLearnerAutoScale(learner.Learner):
    method __init__ (line 1310) | def __init__(self, net):
    method load_data (line 1322) | def load_data(self, x_train):
    method load_train_target (line 1325) | def load_train_target(self):
    method sample_hiddens (line 1328) | def sample_hiddens(self):
    method update_loss_scale (line 1331) | def update_loss_scale(self):
    method f_and_fprime (line 1355) | def f_and_fprime(self, w):
    method create_minibatch_generator (line 1376) | def create_minibatch_generator(self, minibatch_size):
    method f_and_fprime_minibatch (line 1380) | def f_and_fprime_minibatch(self, w):
    method train_stochastic_lbfgs (line 1404) | def train_stochastic_lbfgs(self, **kwargs):
    method f_info (line 1419) | def f_info(self, w):
    method _process_options (line 1455) | def _process_options(self, kwargs):
    method f_post_training (line 1490) | def f_post_training(self):
    method save_model (line 1495) | def save_model(self):
    method save_checkpoint (line 1499) | def save_checkpoint(self, label):

FILE: core/kernels.py
  function safe_diag (line 10) | def safe_diag(x):
  class Kernel (line 21) | class Kernel(object):
    method __init__ (line 22) | def __init__(self):
    method compute_kernel_matrix (line 25) | def compute_kernel_matrix(self, x):
    method compute_kernel_transformation (line 33) | def compute_kernel_transformation(self, x_base, x_new):
    method get_name (line 44) | def get_name(self):
  class GaussianKernel (line 47) | class GaussianKernel(Kernel):
    method __init__ (line 48) | def __init__(self, sigma):
    method compute_kernel_matrix (line 51) | def compute_kernel_matrix(self, x):
    method compute_kernel_transformation (line 58) | def compute_kernel_transformation(self, x_base, x_new):
    method get_name (line 67) | def get_name(self):
  class EuclideanKernel (line 70) | class EuclideanKernel(Kernel):
    method __init__ (line 71) | def __init__(self):
    method compute_kernel_matrix (line 74) | def compute_kernel_matrix(self, x):
    method compute_kernel_transformation (line 81) | def compute_kernel_transformation(self, x_base, x_new):
  class CPUGaussianKernel (line 91) | class CPUGaussianKernel(Kernel):
    method __init__ (line 92) | def __init__(self, sigma):
    method compute_kernel_matrix (line 95) | def compute_kernel_matrix(self, x):
  class LinearKernel (line 98) | class LinearKernel(Kernel):
    method compute_kernel_matrix (line 99) | def compute_kernel_matrix(self, x):
    method compute_kernel_transformation (line 103) | def compute_kernel_transformation(self, x_base, x_new):
    method get_name (line 109) | def get_name(self):
  class CosineKernel (line 112) | class CosineKernel(Kernel):
    method compute_kernel_matrix (line 113) | def compute_kernel_matrix(self, x):
    method compute_kernel_transformation (line 120) | def compute_kernel_transformation(self, x_base, x_new):

FILE: core/util.py
  function to_garray (line 10) | def to_garray(x):
  function to_nparray (line 13) | def to_nparray(x):
  function to_one_of_K (line 16) | def to_one_of_K(t, K=None):
  function to_plus_minus_of_K (line 27) | def to_plus_minus_of_K(t, K=None):

FILE: dataio/mnist.py
  function load_raw_data (line 20) | def load_raw_data():
  function load_data (line 29) | def load_data():
  function load_labeled_data (line 54) | def load_labeled_data(n_val=5000):

FILE: dataio/tfd.py
  function _load_raw_data (line 13) | def _load_raw_data(image_size=48):
  function get_fixed_rand_permutation (line 17) | def get_fixed_rand_permutation(size, seed=1):
  class TFD (line 25) | class TFD(object):
    method __init__ (line 26) | def __init__(self, image_size=48):
    method get_fold (line 33) | def get_fold(self, fold, set_name, center=False, scale=False):
    method get_proper_fold (line 62) | def get_proper_fold(self, fold, set_name, center=False, scale=False):
  function load_fold (line 100) | def load_fold(fold, set_name, center=False, scale=False, image_size=48):
  function load_proper_fold (line 109) | def load_proper_fold(fold, set_name, center=False, scale=False, image_si...

FILE: eval_mmd_generative_model.py
  function load_tfd_fold (line 14) | def load_tfd_fold(fold=0):
  function linear_classifier_discrimination (line 30) | def linear_classifier_discrimination(model, data, C_range=[1], verbose=T...
  function eval_filter_thresholds (line 68) | def eval_filter_thresholds(model, data, thres_range=np.arange(0, 0.9, 0....
  function get_filtered_model (line 93) | def get_filtered_model(net, data):
  function test_single_filter_old (line 99) | def test_single_filter_old(net, data, base_samples, base_classifier, thr...
  function test_single_filter (line 125) | def test_single_filter(net, data, threshold, base_samples=None, base_cla...
  function log_exp_sum_1d (line 128) | def log_exp_sum_1d(x):
  function log_exp_sum (line 140) | def log_exp_sum(x, axis=1):
  class KDE (line 147) | class KDE(object):
    method __init__ (line 151) | def __init__(self, data, sigma):
    method _log_likelihood (line 160) | def _log_likelihood(self, data):
    method log_likelihood (line 163) | def log_likelihood(self, data, batch_size=1000):
    method likelihood (line 178) | def likelihood(self, data):
    method average_likelihood (line 184) | def average_likelihood(self, data):
    method average_log_likelihood (line 187) | def average_log_likelihood(self, data, batch_size=1000):
    method average_std_log_likelihood (line 190) | def average_std_log_likelihood(self, data, batch_size=1000):
    method average_se_log_likelihood (line 194) | def average_se_log_likelihood(self, data, batch_size=1000):
  class AlternativeKDE (line 198) | class AlternativeKDE(object):
    method __init__ (line 202) | def __init__(self, data, sigma):
    method _compute_log_prob (line 208) | def _compute_log_prob(self, data, batch_size=1000):
    method likelihood (line 222) | def likelihood(self, data):
    method average_likelihood (line 228) | def average_likelihood(self, data):
    method log_likelihood (line 231) | def log_likelihood(self, data):
    method average_log_likelihood (line 235) | def average_log_likelihood(self, data):
  function kde_evaluation (line 239) | def kde_evaluation(test_data, samples, sigma_range=np.arange(0.1, 0.3, 0...
  function kde_evaluation_tfd (line 254) | def kde_evaluation_tfd(test_data, samples, sigma_range=np.arange(0.05, 0...
  function kde_evaluation_all_folds (line 257) | def kde_evaluation_all_folds(test_data, samples, sigma_range=np.arange(0...
  function generate_fold_samples (line 274) | def generate_fold_samples(net, fold_model_format, ae=None, fold_ae_forma...
  function get_fold_data (line 285) | def get_fold_data(set_name, n_folds=5):
  function kde_eval_mnist (line 297) | def kde_eval_mnist(net, test_data, n_samples=10000, sigma_range=np.arang...
  function kde_eval_tfd (line 317) | def kde_eval_tfd(net, test_data_all_folds, n_samples=10000, sigma_range=...

FILE: generate_sample_figures.py
  function get_mnist_input_space_model (line 28) | def get_mnist_input_space_model():
  function get_mnist_code_space_model (line 33) | def get_mnist_code_space_model():
  function get_tfd_input_space_model (line 41) | def get_tfd_input_space_model():
  function get_tfd_code_space_model (line 46) | def get_tfd_code_space_model():
  function get_model (line 54) | def get_model(dataset='mnist', mode='input_space'):
  function generate_samples (line 66) | def generate_samples(dataset='mnist', mode='input_space'):
  function generate_all_samples (line 75) | def generate_all_samples():
  function load_train_data (line 81) | def load_train_data(dataset='mnist'):
  function get_nearest_neighbor (line 90) | def get_nearest_neighbor(dataset='mnist', mode='input_space'):
  function get_all_nearest_neighbors (line 100) | def get_all_nearest_neighbors():
  function get_morphing_figure (line 106) | def get_morphing_figure(dataset='mnist', mode='input_space'):
  function get_all_morphing_figures (line 117) | def get_all_morphing_figures():

FILE: test.py
  function good_colored_str (line 28) | def good_colored_str(txt):
  function bad_colored_str (line 31) | def bad_colored_str(txt):
  function vec_str (line 34) | def vec_str(v):
  function test_vec_pair (line 41) | def test_vec_pair(v1, msg1, v2, msg2, error_thres=_GRAD_CHECK_EPS):
  function finite_difference_gradient (line 55) | def finite_difference_gradient(f, x):
  function fdiff_grad_generator (line 68) | def fdiff_grad_generator(net, x, t, add_noise=False, seed=None):
  function test_net_io (line 85) | def test_net_io(f_create, f_create_void):
  function test_databias_loss (line 104) | def test_databias_loss(loss_type, **kwargs):
  function create_databias_net (line 129) | def create_databias_net(dropout_rate):
  function test_databias_loss_with_net (line 135) | def test_databias_loss_with_net(add_noise, loss_type, **kwargs):
  function test_generative_mmd_loss (line 170) | def test_generative_mmd_loss(sigma=1):
  function test_generative_multi_scale_mmd_loss (line 193) | def test_generative_multi_scale_mmd_loss(sigma=[1, 10], scale_weight=None):
  function test_linear_time_mmd_loss (line 216) | def test_linear_time_mmd_loss(sigma=1.0, use_modified_loss=False, use_ab...
  function test_linear_time_minibatch_mmd_loss (line 240) | def test_linear_time_minibatch_mmd_loss(sigma=1.0, minibatch_size=100):
  function test_random_feature_mmd_loss (line 265) | def test_random_feature_mmd_loss(sigma=[1,10], scale_weight=[0.5, 1], n_...
  function test_random_feature_mmd_loss_approximation (line 290) | def test_random_feature_mmd_loss_approximation(sigma=[1,10], scale_weigh...
  function test_pair_mmd_loss_multiscale (line 321) | def test_pair_mmd_loss_multiscale(sigma=[1, 10], scale_weight=None):
  function test_diff_kernel_mmd_loss (line 345) | def test_diff_kernel_mmd_loss(sigma=[1], scale_weight=[1], loss_name=None):
  function test_diff_kernel_per_example_mmd_loss (line 372) | def test_diff_kernel_per_example_mmd_loss(sigma=[1], scale_weight=[1], p...
  function test_all_diff_kernel_per_example_mmd_loss (line 404) | def test_all_diff_kernel_per_example_mmd_loss():
  function test_all_diff_kernel_mmd_loss (line 439) | def test_all_diff_kernel_mmd_loss():
  function test_all_generative_mmd_loss (line 470) | def test_all_generative_mmd_loss():
  function run_all_tests (line 535) | def run_all_tests():

FILE: train.py
  function write_config (line 26) | def write_config(file_name, config):
  function cat_list (line 35) | def cat_list(lst):
  function load_tfd_fold (line 39) | def load_tfd_fold(fold=0):
  function load_tfd_all_folds (line 54) | def load_tfd_all_folds(set_name='val', n_folds=5):
  function mnist_mmd_input_space (line 62) | def mnist_mmd_input_space(n_hids=[10,64,256,256,1024], sigma=[2,5,10,20,...
  function mnist_mmd_code_space (line 165) | def mnist_mmd_code_space(
  function tfd_mmd_input_space (line 322) | def tfd_mmd_input_space(n_hids=[10,64,256,256,1024], sigma=[5,10,20,40,8...
  function tfd_mmd_code_space (line 425) | def tfd_mmd_code_space(

FILE: vistools.py
  function bwpatchview (line 9) | def bwpatchview(data, imsz, nrows, gridwidth=1, gridintensity=0, rowmajo...
  function cpatchview (line 56) | def cpatchview(data, imsz, nrows, gridwidth=1, gridintensity=0, rowmajor...
  function listpatchview (line 111) | def listpatchview(data, nrows, gridwidth=1, gridintensity=0, ax=None):
  function plot2dgaussian (line 165) | def plot2dgaussian(mu, sigma, npoints=100, linespec=None, linewidth=1, a...
  function intarray_to_rgb (line 190) | def intarray_to_rgb(x, cmap):
  function pil_png_cmap_to_dict (line 208) | def pil_png_cmap_to_dict(pil_palette):

FILE: visualize.py
  function nn_search (line 15) | def nn_search(samples, database, top_k=1, imsz=[28,28], orientation='hor...
  function view_checkpoints (line 53) | def view_checkpoints(model_dir, sigma, imsz=[28,28], figid=101):
  function generation_on_a_line (line 76) | def generation_on_a_line(net, n_points=100, imsz=[28,28], nrows=10, h_se...
  function generate_morphing_video (line 98) | def generate_morphing_video(net, h_seeds, n_points=100, imsz=[28,28], ou...
  function plot_dataset (line 121) | def plot_dataset(x, t, ax=None):
  function plot_decision_boundary (line 139) | def plot_decision_boundary(f, x_range, y_range, density, ax=None, **kwar...
Condensed preview — 15 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (140K chars).
[
  {
    "path": ".gitignore",
    "chars": 12,
    "preview": "*.pyc\n*.swp\n"
  },
  {
    "path": "README.md",
    "chars": 2022,
    "preview": "#Generative Moment Matching Networks (GMMNs)\nThis is the code we used for the following paper:\n* Yujia Li, Kevin Swersky"
  },
  {
    "path": "core/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "core/generative.py",
    "chars": 52642,
    "preview": "\"\"\"\nGenerative model using MMD objective.\n\nYujia Li, 09/2014\n\"\"\"\n\nimport pynn.nn as nn\nimport pynn.loss as ls\nimport pyn"
  },
  {
    "path": "core/kernels.py",
    "chars": 3953,
    "preview": "\"\"\"\nImplementation of different kernel functions.\n\nYujia Li, 11/2014\n\"\"\"\n\nimport numpy as np\nimport gnumpy as gnp\n\ndef s"
  },
  {
    "path": "core/util.py",
    "chars": 895,
    "preview": "\"\"\"\nSome helpful utility functions.\n\nYujia Li, 09/2014\n\"\"\"\n\nimport gnumpy as gnp\nimport numpy as np\n\ndef to_garray(x):\n "
  },
  {
    "path": "dataio/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "dataio/mnist.py",
    "chars": 2066,
    "preview": "\"\"\"\nData I/O for mnist dataset.\n\nYujia Li, 01/2015\n\"\"\"\n\nimport cPickle as pickle\nimport numpy as np\n\n# Fill in the path "
  },
  {
    "path": "dataio/tfd.py",
    "chars": 3958,
    "preview": "\"\"\"\nThe Toronto Face Database, Charlie/Marc'Aurelio's version\n\nYujia Li, 01/2015\n\"\"\"\n\nimport scipy.io as sio\nimport nump"
  },
  {
    "path": "eval_mmd_generative_model.py",
    "chars": 12446,
    "preview": "\"\"\"\nModule for evaluating MMD generative models.\n\nYujia Li, 11/2014\n\"\"\"\n\nimport cPickle as pickle\nimport time\nimport num"
  },
  {
    "path": "generate_sample_figures.py",
    "chars": 4277,
    "preview": "\"\"\"\nScript used for generating sample figures used in the paper.\n\nYujia Li, 02/2015\n\"\"\"\n\nimport core.generative as gen\ni"
  },
  {
    "path": "test.py",
    "chars": 18916,
    "preview": "\"\"\"\nDebug tests for the datasetbias project.\n\nYujia Li, 09/2014\n\"\"\"\nimport os\nos.environ['GNUMPY_CPU_PRECISION'] = '64'\n"
  },
  {
    "path": "train.py",
    "chars": 21538,
    "preview": "\"\"\"\nTraining script for MNIST/TFD.\n\nYujia Li, 01/2015\n\"\"\"\n\nimport argparse\n\nimport cPickle as pickle\nimport pynn.nn as n"
  },
  {
    "path": "vistools.py",
    "chars": 7348,
    "preview": "\"\"\"This module contains useful tools that makes data visualization easier.\n\nYujia Li, 03/2013\n\"\"\"\n\nimport numpy as np\nim"
  },
  {
    "path": "visualize.py",
    "chars": 5016,
    "preview": "import matplotlib.pyplot as plt\nimport numpy as np\nimport gnumpy as gnp\nimport vistools as vt\nimport core.generative as "
  }
]

About this extraction

This page contains the full source code of the yujiali/gmmn GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 15 files (131.9 KB), approximately 37.0k tokens, and a symbol index with 286 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!