Repository: stevliu/self-conditioned-gan
Branch: master
Commit: a12fc3a99876
Files: 119
Total size: 375.7 KB

Directory structure:
gitextract_vn0la05j/

├── .gitignore
├── 2d_mix/
│   ├── .gitignore
│   ├── __init__.py
│   ├── config.py
│   ├── evaluation.py
│   ├── inputs.py
│   ├── models/
│   │   ├── __init__.py
│   │   └── cluster.py
│   ├── train.py
│   └── visualizations.py
├── LICENSE
├── README.md
├── cluster_metrics.py
├── clusterers/
│   ├── __init__.py
│   ├── base_clusterer.py
│   ├── kmeans.py
│   ├── online.py
│   ├── random_labels.py
│   └── selfcondgan.py
├── configs/
│   ├── cifar/
│   │   ├── conditional.yaml
│   │   ├── default.yaml
│   │   ├── selfcondgan.yaml
│   │   └── unconditional.yaml
│   ├── default.yaml
│   ├── imagenet/
│   │   ├── conditional.yaml
│   │   ├── default.yaml
│   │   ├── selfcondgan.yaml
│   │   └── unconditional.yaml
│   ├── places/
│   │   ├── conditional.yaml
│   │   ├── default.yaml
│   │   ├── selfcondgan.yaml
│   │   └── unconditional.yaml
│   ├── pretrained/
│   │   ├── imagenet/
│   │   │   ├── conditional.yaml
│   │   │   ├── selfcondgan.yaml
│   │   │   └── unconditional.yaml
│   │   └── places/
│   │       ├── conditional.yaml
│   │       ├── selfcondgan.yaml
│   │       └── unconditional.yaml
│   └── stacked_mnist/
│       ├── conditional.yaml
│       ├── default.yaml
│       ├── selfcondgan.yaml
│       └── unconditional.yaml
├── gan_training/
│   ├── __init__.py
│   ├── checkpoints.py
│   ├── config.py
│   ├── distributions.py
│   ├── eval.py
│   ├── inputs.py
│   ├── logger.py
│   ├── metrics/
│   │   ├── __init__.py
│   │   ├── clustering_metrics.py
│   │   ├── fid.py
│   │   ├── inception_score.py
│   │   └── tf_is/
│   │       ├── LICENSE
│   │       ├── README.md
│   │       └── inception_score.py
│   ├── models/
│   │   ├── __init__.py
│   │   ├── blocks.py
│   │   ├── dcgan_deep.py
│   │   ├── dcgan_shallow.py
│   │   ├── resnet2.py
│   │   ├── resnet2s.py
│   │   └── resnet3.py
│   ├── train.py
│   └── utils.py
├── metrics.py
├── requirements.txt
├── seeded_sampler.py
├── seeing/
│   ├── frechet_distance.py
│   ├── fsd.py
│   ├── lightbox.html
│   ├── parallelfolder.py
│   ├── pbar.py
│   ├── pidfile.py
│   ├── sampler.py
│   ├── segmenter.py
│   ├── upsegmodel/
│   │   ├── __init__.py
│   │   ├── models.py
│   │   ├── prroi_pool/
│   │   │   ├── .gitignore
│   │   │   ├── README.md
│   │   │   ├── __init__.py
│   │   │   ├── build.py
│   │   │   ├── functional.py
│   │   │   ├── prroi_pool.py
│   │   │   ├── src/
│   │   │   │   ├── prroi_pooling_gpu.c
│   │   │   │   ├── prroi_pooling_gpu.h
│   │   │   │   ├── prroi_pooling_gpu_impl.cu
│   │   │   │   └── prroi_pooling_gpu_impl.cuh
│   │   │   └── test_prroi_pooling2d.py
│   │   ├── resnet.py
│   │   └── resnext.py
│   ├── yz_dataset.py
│   └── zdataset.py
├── train.py
├── utils/
│   ├── classifiers/
│   │   ├── __init__.py
│   │   ├── cifar.py
│   │   ├── imagenet.py
│   │   ├── imagenet_class_index.json
│   │   ├── places.py
│   │   ├── pytorch_playground/
│   │   │   ├── .gitignore
│   │   │   ├── LICENSE
│   │   │   ├── README.md
│   │   │   ├── cifar/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── dataset.py
│   │   │   │   ├── model.py
│   │   │   │   └── train.py
│   │   │   ├── quantize.py
│   │   │   ├── requirements.txt
│   │   │   ├── roadmap_zh.md
│   │   │   ├── setup.py
│   │   │   └── utee/
│   │   │       ├── __init__.py
│   │   │       ├── misc.py
│   │   │       ├── quant.py
│   │   │       └── selector.py
│   │   └── stacked_mnist.py
│   ├── get_empirical_distribution.py
│   ├── get_gt_imgs.py
│   └── np_to_pt_img.py
└── visualize_clusters.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
*/**/*pyc*
*/**/.DS_Store
.vscode


================================================
FILE: 2d_mix/.gitignore
================================================
**.png
**.pyc
**.pt
output/


================================================
FILE: 2d_mix/__init__.py
================================================


================================================
FILE: 2d_mix/config.py
================================================
import torch

from models import generator_dict, discriminator_dict
from torch import optim
import torch.utils.data as utils


def get_models(model_type, conditioning, k_value, d_act_dim, device):
    G = generator_dict[model_type]
    D = discriminator_dict[model_type]
    generator = G(conditioning, k_value=k_value)
    discriminator = D(conditioning, k_value=k_value, act_dim=d_act_dim)

    generator.to(device)
    discriminator.to(device)

    return generator, discriminator


def get_optimizers(generator, discriminator, lr=1e-4, beta1=0.8, beta2=0.999):
    g_optimizer = optim.Adam(generator.parameters(),
                             lr=lr,
                             betas=(beta1, beta2))
    d_optimizer = optim.Adam(discriminator.parameters(),
                             lr=lr,
                             betas=(beta1, beta2))
    return g_optimizer, d_optimizer


def get_test(get_data, batch_size, variance, k_value, device):
    x_test, y_test = get_data(batch_size, var=variance)
    x_test, y_test = torch.from_numpy(x_test).float().to(
        device), torch.from_numpy(y_test).long().to(device)
    return x_test, y_test


def get_dataset(get_data, batch_size, npts, variance, k_value):
    samples, labels = get_data(npts, var=variance)
    tensor_samples = torch.stack([torch.Tensor(x) for x in samples])
    tensor_labels = torch.stack([torch.tensor(x) for x in labels])
    dataset = utils.TensorDataset(tensor_samples, tensor_labels)
    train_loader = utils.DataLoader(dataset,
                                    batch_size=batch_size,
                                    shuffle=True,
                                    num_workers=0,
                                    pin_memory=True,
                                    sampler=None,
                                    drop_last=True)
    return train_loader


================================================
FILE: 2d_mix/evaluation.py
================================================
def warn(*args, **kwargs):
    pass


import warnings
warnings.warn = warn

import numpy as np


def percent_good_grid(x_fake, var=0.0025, nrows=5, ncols=5):
    std = np.sqrt(var)
    x = list(range(nrows))
    y = list(range(ncols))

    threshold = 3 * std
    means = []
    for i in x:
        for j in y:
            means.append(np.array([x[i] * 2 - 4, y[j] * 2 - 4]))
    return percent_good_pts(x_fake, means, threshold)


def percent_good_ring(x_fake, var=0.0001, n_clusters=8, radius=2.0):
    std = np.sqrt(var)
    thetas = np.linspace(0, 2 * np.pi, n_clusters + 1)[:n_clusters]
    x, y = radius * np.sin(thetas), radius * np.cos(thetas)
    threshold = np.array([std * 3, std * 3])
    means = []
    for i in range(n_clusters):
        means.append(np.array([x[i], y[i]]))
    return percent_good_pts(x_fake, means, threshold)


def percent_good_pts(x_fake, means, threshold):
    """Calculate %good, #modes, kl

    Keyword arguments:
    x_fake -- detached generated samples
    means -- true means
    threshold -- good point if l_1 distance is within threshold
    """
    count = 0
    counts = np.zeros(len(means))
    visited = set()
    for point in x_fake:
        minimum = 0
        diff_minimum = [1e10, 1e10]
        for i, mean in enumerate(means):
            diff = np.abs(point - mean)
            if np.all(diff < threshold):
                visited.add(tuple(mean))
                count += 1
                break
        for i, mean in enumerate(means):
            diff = np.abs(point - mean)
            if np.linalg.norm(diff) < np.linalg.norm(diff_minimum):
                minimum = i
                diff_minimum = diff
        counts[minimum] += 1

    kl = 0
    counts = counts / len(x_fake)
    for generated in counts:
        if generated != 0:
            kl += generated * np.log(len(means) * generated)

    return count / len(x_fake), len(visited), kl


================================================
FILE: 2d_mix/inputs.py
================================================
import numpy as np
import random

mapping = list(range(25))

def map_labels(labels):
    return np.array([mapping[label] for label in labels])


def get_data_ring(batch_size, radius=2.0, var=0.0001, n_clusters=8):
    thetas = np.linspace(0, 2 * np.pi, n_clusters + 1)[:n_clusters]
    xs, ys = radius * np.sin(thetas), radius * np.cos(thetas)
    classes = np.random.multinomial(batch_size,
                                    [1.0 / n_clusters] * n_clusters)
    labels = [i for i in range(n_clusters) for _ in range(classes[i])]
    random.shuffle(labels)
    labels = np.array(labels)
    samples = np.array([
        np.random.multivariate_normal([xs[i], ys[i]], [[var, 0], [0, var]])
        for i in labels
    ])
    return samples, labels


def get_data_grid(batch_size, radius=2.0, var=0.0025, nrows=5, ncols=5):
    samples = []
    labels = []
    for _ in range(batch_size):
        i, j = random.randint(0, ncols - 1), random.randint(0, nrows - 1)
        samples.append(
            np.random.multivariate_normal([i * 2 - 4, j * 2 - 4],
                                          [[var, 0], [0, var]]))
        labels.append(5 * i + j)
    return np.array(samples), map_labels(labels)


================================================
FILE: 2d_mix/models/__init__.py
================================================
from models import (cluster)

generator_dict = {'standard': cluster.G}
discriminator_dict = {'standard': cluster.D}


================================================
FILE: 2d_mix/models/cluster.py
================================================
import sys

import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable

sys.path.append('../gan_training/models')

from blocks import LatentEmbeddingConcat, Identity, LinearUnconditionalLogits, LinearConditionalMaskLogits


class G(nn.Module):
    def __init__(self,
                 conditioning,
                 k_value,
                 z_dim=2,
                 embed_size=32,
                 act_dim=400,
                 x_dim=2):

        super().__init__()
        if conditioning == 'unconditional':
            embed_size = 0
            self.embedding = Identity()
        elif conditioning == 'conditional':
            self.embedding = LatentEmbeddingConcat(k_value, embed_size)
        else:
            raise NotImplementedError()

        self.fc1 = nn.Sequential(nn.Linear(z_dim + embed_size, act_dim),
                                 nn.BatchNorm1d(act_dim), nn.ReLU(True))
        self.fc2 = nn.Sequential(nn.Linear(act_dim, act_dim),
                                 nn.BatchNorm1d(act_dim), nn.ReLU(True))
        self.fc3 = nn.Sequential(nn.Linear(act_dim, act_dim),
                                 nn.BatchNorm1d(act_dim), nn.ReLU(True))
        self.fc4 = nn.Sequential(nn.Linear(act_dim, act_dim),
                                 nn.BatchNorm1d(act_dim), nn.ReLU(True))
        self.fc_out = nn.Linear(act_dim, x_dim)

    def forward(self, z, y=None):
        out = self.fc1(self.embedding(z, y))
        out = self.fc2(out)
        out = self.fc3(out)
        out = self.fc4(out)
        out = self.fc_out(out)
        return out


class D(nn.Module):
    class Maxout(nn.Module):
        # Taken from https://github.com/pytorch/pytorch/issues/805
        def __init__(self, d_in, d_out, pool_size=5):
            super().__init__()
            self.d_in, self.d_out, self.pool_size = d_in, d_out, pool_size
            self.lin = nn.Linear(d_in, d_out * pool_size)

        def forward(self, inputs):
            shape = list(inputs.size())
            shape[-1] = self.d_out
            shape.append(self.pool_size)
            max_dim = len(shape) - 1
            out = self.lin(inputs)
            m, i = out.view(*shape).max(max_dim)
            return m

    def max(self, out, dim=5):
        return out.view(out.size(0), -1, dim).max(2)[0]

    def __init__(self, conditioning, k_value, act_dim=200, x_dim=2):
        super().__init__()
        self.fc1 = self.Maxout(x_dim, act_dim)
        self.fc2 = self.Maxout(act_dim, act_dim)
        self.fc3 = self.Maxout(act_dim, act_dim)

        if conditioning == 'unconditional':
            self.fc4 = LinearUnconditionalLogits(act_dim)
        elif conditioning == 'conditional':
            self.fc4 = LinearConditionalMaskLogits(act_dim, k_value)
        else:
            raise NotImplementedError()

    def forward(self, x, y=None, get_features=False):
        out = self.fc1(x)
        out = self.fc2(out)
        out = self.fc3(out)
        if get_features: return out
        return self.fc4(out, y, get_features=get_features)


================================================
FILE: 2d_mix/train.py
================================================
import argparse
import os
import sys

import torch
from torch import optim
from torch import distributions
from torch import nn
import torch.nn.functional as F
import numpy as np

import evaluation
import inputs

from config import get_models, get_optimizers, get_test, get_dataset
from visualizations import (visualize_generated, visualize_clusters)

sys.path.append('../')
from clusterers import clusterer_dict
from gan_training.train import Trainer

sys.path.append('../seeing/')
import pidfile

parser = argparse.ArgumentParser(description='2d dataset experiments')
parser.add_argument('--clusterer', help='type of clusterer to use. cluster specifies selfcondgan')
parser.add_argument('--data_type', help='either grid or ring')
parser.add_argument('--recluster_every', type=int, default=5000, help='how frequently to recluster')
parser.add_argument('--nruns', type=int, default=1, help='number of trials to do')
parser.add_argument('--burnin_time', type=int, default=0, help='wait this amount of iterations before clustering')

parser.add_argument('--variance', type=float, default=None, help='variance of the gaussians')
parser.add_argument('--model_type', type=str, default='standard', help='model architecture')
parser.add_argument('--num_clusters', type=int, default=50, help='number of clusters to use for selfcondgan')
parser.add_argument('--z_dim', type=int, default=2, help='G latent dim')
parser.add_argument('--d_act_dim', type=int, default=200, help='hidden layer width')
parser.add_argument('--npts', type=int, default=100000, help='number of points to use in dataset')
parser.add_argument('--train_batch_size', type=int, default=100, help='training time batch size')
parser.add_argument('--test_batch_size', type=int, default=50000, help='number of examples to get metrics with')
parser.add_argument('--nepochs', type=int, default=100, help='number of epochs to run')
parser.add_argument('--outdir', default='output')
args = parser.parse_args()

data_type = args.data_type
k_value = 8 if data_type == 'ring' else 25
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
num_clusters = k_value if args.clusterer == 'supervised' else args.num_clusters

exp_name = f'{args.data_type}_{args.clusterer}_{args.recluster_every}_{num_clusters}/'
if args.model_type != 'standard':
    exp_name = f'{args.model_type}_{exp_name}'
if args.variance is not None:
    exp_name = f'{args.variance}_{exp_name}'

if args.variance is None:
    variance = 0.0025 if data_type == 'grid' else 0.0001
else:
    variance = args.variance
nepochs = args.nepochs
z_dim = args.z_dim
test_batch_size = args.test_batch_size
train_batch_size = args.train_batch_size
npts = args.npts


def main(outdir):
    for subdir in ['all', 'snapshots', 'clusters']:
        if not os.path.exists(os.path.join(outdir, subdir)):
            os.makedirs(os.path.join(outdir, subdir), exist_ok=True)

    if data_type == 'grid':
        get_data = inputs.get_data_grid
        percent_good = evaluation.percent_good_grid
    elif data_type == 'ring':
        get_data = inputs.get_data_ring
        percent_good = evaluation.percent_good_ring
    else:
        raise NotImplementedError()

    zdist = distributions.Normal(torch.zeros(z_dim, device=device),
                                 torch.ones(z_dim, device=device))
    z_test = zdist.sample((test_batch_size, ))

    x_test, y_test = get_test(get_data=get_data,
                              batch_size=test_batch_size,
                              variance=variance,
                              k_value=k_value,
                              device=device)

    x_cluster, _ = get_test(get_data=get_data,
                            batch_size=10000,
                            variance=variance,
                            k_value=k_value,
                            device=device)

    train_loader = get_dataset(get_data=get_data,
                               batch_size=train_batch_size,
                               npts=npts,
                               variance=variance,
                               k_value=k_value)

    def train(trainer, g, d, clusterer, exp_dir):
        it = 0
        if os.path.exists(os.path.join(exp_dir, 'log.txt')):
            os.remove(os.path.join(exp_dir, 'log.txt'))

        for epoch in range(nepochs):
            for x_real, y in train_loader:
                z = zdist.sample((train_batch_size, ))
                x_real, y = x_real.to(device), y.to(device)
                y = clusterer.get_labels(x_real, y)

                dloss, _ = trainer.discriminator_trainstep(x_real, y, z)
                gloss = trainer.generator_trainstep(y, z)

                if it % args.recluster_every == 0 and args.clusterer != 'supervised':
                    if args.clusterer != 'burnin' or it >= args.burnin_time:
                        clusterer.recluster(discriminator, x_batch=x_real)

                if it % 1000 == 0:
                    x_fake = g(z_test, clusterer.get_labels(x_test, y_test)).detach().cpu().numpy()

                    visualize_generated(x_fake,
                                        x_test.detach().cpu().numpy(), y, it,
                                        exp_dir)

                    visualize_clusters(x_test.detach().cpu().numpy(),
                                       clusterer.get_labels(x_test, y_test),
                                       it, exp_dir)

                    torch.save(
                        {
                            'generator': g.state_dict(),
                            'discriminator': d.state_dict(),
                            'g_optimizer': g_optimizer.state_dict(),
                            'd_optimizer': d_optimizer.state_dict()
                        },
                        os.path.join(exp_dir, 'snapshots', 'model_%d.pt' % it))

                if it % 1000 == 0:
                    g.eval()
                    d.eval()

                    x_fake = g(z_test, clusterer.get_labels(
                        x_test, y_test)).detach().cpu().numpy()
                    percent, modes, kl = percent_good(x_fake, var=variance)
                    log_message = f'[epoch {epoch} it {it}] dloss = {dloss}, gloss = {gloss}, prop_real = {percent}, modes = {modes}, kl = {kl}'
                    with open(os.path.join(exp_dir, 'log.txt'), 'a+') as f:
                        f.write(log_message + '\n')
                    print(log_message)

                it += 1

    # train a G/D from scratch
    generator, discriminator = get_models(args.model_type, 'conditional', num_clusters, args.d_act_dim, device)
    g_optimizer, d_optimizer = get_optimizers(generator, discriminator)
    trainer = Trainer(generator, discriminator, g_optimizer, d_optimizer, gan_type='standard', reg_type='none', reg_param=0)
    clusterer = clusterer_dict[args.clusterer](discriminator=discriminator,
                                               k_value=num_clusters,
                                               x_cluster=x_cluster)
    clusterer.recluster(discriminator=discriminator)
    train(trainer, generator, discriminator, clusterer, os.path.join(outdir))


if __name__ == '__main__':
    outdir = os.path.join(args.outdir, exp_name)
    pidfile.exit_if_job_done(outdir)
    for run_number in range(args.nruns):
        run_dir = f'{outdir}_run_{run_number}' if args.nruns > 1 else outdir
        main(run_dir)
    pidfile.mark_job_done(outdir)


================================================
FILE: 2d_mix/visualizations.py
================================================
import matplotlib
from matplotlib import pyplot
import os

COLORS = [
    'purple',
    'wheat',
    'maroon',
    'red',
    'powderblue',
    'dodgerblue',
    'magenta',
    'tan',
    'aqua',
    'yellow',
    'slategray',
    'blue',
    'rosybrown',
    'violet',
    'lightseagreen',
    'pink',
    'darkorange',
    'teal',
    'royalblue',
    'lawngreen',
    'gold',
    'navy',
    'darkgreen',
    'deeppink',
    'palegreen',
    'silver',
    'saddlebrown',
    'plum',
    'peru',
    'black',
]

assert (len(COLORS) == len(set(COLORS)))

def visualize_generated(fake, real, y, it, outdir):
    pyplot.plot(real[:, 0], real[:, 1], 'r.')
    pyplot.plot(fake[:, 0], fake[:, 1], 'b.')
    pyplot.savefig(os.path.join(outdir, 'all', str(it) + '.png'))
    pyplot.clf()

    lim = 6
    axes = pyplot.gca()
    axes.set_aspect('equal', adjustable='box')
    axes.set_xlim([-lim, lim])
    axes.set_ylim([-lim, lim])

    pyplot.locator_params(nbins=4)
    pyplot.tight_layout()

    pyplot.plot(fake[:, 0], fake[:, 1], 'b.', alpha=0.1)
    pyplot.savefig(os.path.join(outdir, 'all',
                                str(it) + 'square.png'),
                   dpi=100,
                   bbox_inches='tight')
    pyplot.clf()


def visualize_clusters(x, y, it, outdir):
    y = y.detach().cpu().numpy()
    for i in range(y.max()):
        pyplot.plot(x[y == i, 0],
                    x[y == i, 1],
                    '.',
                    color=COLORS[i % len(COLORS)])
    pyplot.savefig(os.path.join(outdir, 'clusters', str(it) + '.png'))
    pyplot.clf()


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2020 Steven Liu

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

================================================
FILE: README.md
================================================
# Diverse Image Generation via Self-Conditioned GANs

#### [Project](http://selfcondgan.csail.mit.edu/) |   [Paper](http://selfcondgan.csail.mit.edu/preprint.pdf)

**Diverse Image Generation via Self-Conditioned GANs** <br>
[Steven Liu](http://people.csail.mit.edu/stevenliu/),
[Tongzhou Wang](https://ssnl.github.io/),
[David Bau](http://people.csail.mit.edu/davidbau/home/),
[Jun-Yan Zhu](http://people.csail.mit.edu/junyanz/),
[Antonio Torralba](http://web.mit.edu/torralba/www/) <br>
MIT, Adobe Research<br>
in CVPR 2020.

![Teaser](images/teaser.png)

Our proposed self-conditioned GAN model learns to perform clustering and image synthesis simultaneously. The model training
requires no manual annotation of object classes. Here, we visualize several discovered clusters for both Places365 (top) and ImageNet
(bottom). For each cluster, we show both real images and the generated samples conditioned on the cluster index.

## Getting Started

### Installation
- Clone this repo:
```bash
git clone https://github.com/stevliu/self-conditioned-gan.git
cd self-conditioned-gan
```

- Install the dependencies
```bash
conda create --name selfcondgan python=3.6
conda activate selfcondgan
conda install --file requirements.txt
conda install -c conda-forge tensorboardx
```
### Training and Evaluation
- Train a model on CIFAR:
```bash
python train.py configs/cifar/selfcondgan.yaml
```

- Visualize samples and inferred clusters:
```bash
python visualize_clusters.py configs/cifar/selfcondgan.yaml --show_clusters
```
The samples and clusters will be saved to `output/cifar/selfcondgan/clusters`. If this directory lies on an Apache server, you can open the URL to `output/cifar/selfcondgan/clusters/+lightbox.html` in the browser and visualize all samples and clusters in one webpage.

- Evaluate the model's FID:
You will need to first gather a set of ground truth train set images to compute metrics against.
```bash
python utils/get_gt_imgs.py --cifar
python metrics.py configs/cifar/selfcondgan.yaml --fid --every -1
```
You can also evaluate with other metrics by appending additional flags, such as Inception Score (`--inception`), the number of covered modes + reverse-KL divergence (`--modes`), and cluster metrics (`--cluster_metrics`).

## Pretrained Models

You can load and evaluate pretrained models on ImageNet and Places. If you have access to ImageNet or Places directories, first fill in paths to your ImageNet and/or Places dataset directories in `configs/imagenet/default.yaml` and `configs/places/default.yaml` respectively. You can use the following config files with the evaluation scripts, and the code will automatically download the appropriate models.

```bash
configs/pretrained/imagenet/selfcondgan.yaml
configs/pretrained/places/selfcondgan.yaml

configs/pretrained/imagenet/conditional.yaml
configs/pretrained/places/conditional.yaml

configs/pretrained/imagenet/baseline.yaml
configs/pretrained/places/baseline.yaml
```

## Evaluation
### Visualizations

To visualize generated samples and inferred clusters, run
```bash
python visualize_clusters.py config-file
```
You can set the flag `--show_clusters` to also visualize the real inferred clusters, but this requires that you have a path to training set images.

### Metrics
To obtain generation metrics, fill in paths to your ImageNet or Places dataset directories in `utils/get_gt_imgs.py` and then run
```bash
python utils/get_gt_imgs.py --imagenet --places
```
to precompute batches of GT images for FID/FSD evaluation.

Then, you can use
```bash
python metrics.py config-file
```
with the appropriate flags compute the FID (`--fid`), FSD (`--fsd`), IS (`--inception`), number of modes covered/ reverse-KL divergence (`--modes`) and clustering metrics (`--cluster_metrics`) for each of the checkpoints.

## Training models
To train a model, set up a configuration file (examples in `/configs`), and run
```bash
python train.py config-file
```

An example config of self-conditioned GAN on ImageNet is `config/imagenet/selfcondgan.yaml` and on Places is `config/places/selfcondgan.yaml`.

Some models may be too large to fit on one GPU, so you may want to add `--devices DEVICE_NUMBERS` as an additional flag to do multi GPU training.

## 2D-experiments
For synthetic dataset experiments, first go into the `2d_mix` directory.

To train a self-conditioned GAN on the 2D-ring and 2D-grid dataset, run
```bash
python train.py --clusterer selfcondgan --data_type ring
python train.py --clusterer selfcondgan --data_type grid
```
You can test several other configurations via the command line arguments.


## Acknowledgments
This code is heavily based on the [GAN-stability](https://github.com/LMescheder/GAN_stability) code base.
Our FSD code is taken from the [GANseeing](https://github.com/davidbau/ganseeing) work.
To compute inception score, we use the code provided from [Shichang Tang](https://github.com/tsc2017/Inception-Score.git).
To compute FID, we use the code provided from [TTUR](https://github.com/bioinf-jku/TTUR).
We also use pretrained classifiers given by the [pytorch-playground](https://github.com/aaron-xichen/pytorch-playground).

We thank all the authors for their useful code.

## Citation
If you use this code for your research, please cite the following work.
```
@inproceedings{liu2020selfconditioned,
 title={Diverse Image Generation via Self-Conditioned GANs},
 author={Liu, Steven and Wang, Tongzhou and Bau, David and Zhu, Jun-Yan and Torralba, Antonio},
 booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
 year={2020}
}
```


================================================
FILE: cluster_metrics.py
================================================
import argparse
import os
from tqdm import tqdm

import torch
import numpy as np
from torch import nn

from gan_training import utils
from gan_training.inputs import get_dataset
from gan_training.checkpoints import CheckpointIO
from gan_training.config import load_config
from gan_training.metrics.clustering_metrics import (nmi, purity_score)

torch.backends.cudnn.benchmark = True

# Arguments
parser = argparse.ArgumentParser(description='Evaluate the clustering inferred by our method')
parser.add_argument('config', type=str, help='Path to config file.')
parser.add_argument('--model_it', type=str)
parser.add_argument('--random', action='store_true', help='Figure out if the clusters were randomly assigned')

args = parser.parse_args()
config = load_config(args.config, 'configs/default.yaml')
out_dir = config['training']['out_dir']


def main():
    checkpoint_dir = os.path.join(out_dir, 'chkpts')
    batch_size = config['training']['batch_size']

    if 'cifar' in config['data']['train_dir'].lower():
        name = 'cifar10'
    elif 'stacked_mnist' == config['data']['type']:
        name = 'stacked_mnist'
    else:
        name = 'image'

    if os.path.exists(os.path.join(out_dir, 'cluster_preds.npz')):
        # if we've already computed assignments, load them and move on
        with np.load(os.path.join(out_dir, 'cluster_preds.npz')) as f:
            y_reals = f['y_reals']
            y_preds = f['y_preds']
    else:
        train_dataset, _ = get_dataset(
            name=name,
            data_dir=config['data']['train_dir'],
            size=config['data']['img_size'])

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=batch_size,
            num_workers=config['training']['nworkers'],
            shuffle=True,
            pin_memory=True,
            sampler=None,
            drop_last=True)

        checkpoint_io = CheckpointIO(checkpoint_dir=checkpoint_dir)

        print('Loading clusterer:')
        most_recent = utils.get_most_recent(checkpoint_dir, 'model') if args.model_it is None else args.model_it
        clusterer = checkpoint_io.load_clusterer(most_recent, load_samples=False, pretrained=config['pretrained'])

        if isinstance(clusterer.discriminator, nn.DataParallel):
            clusterer.discriminator = clusterer.discriminator.module

        y_preds = []
        y_reals = []

        for batch_num, (x_real, y_real) in enumerate(tqdm(train_loader, total=len(train_loader))):
            y_pred = clusterer.get_labels(x_real.cuda(), None)
            y_preds.append(y_pred.detach().cpu())
            y_reals.append(y_real)

        y_reals = torch.cat(y_reals).numpy()
        y_preds = torch.cat(y_preds).numpy()

        np.savez(os.path.join(out_dir, 'cluster_preds.npz'), y_reals=y_reals, y_preds=y_preds)

    if args.random:
        y_preds = np.random.randint(0, 100, size=y_reals.shape)

    nmi_score = nmi(y_preds, y_reals)
    purity = purity_score(y_preds, y_reals)
    print('nmi', nmi_score, 'purity', purity)


if __name__ == '__main__':
    main()


================================================
FILE: clusterers/__init__.py
================================================
from clusterers import (base_clusterer, selfcondgan, random_labels, online)

clusterer_dict = {
    'supervised': base_clusterer.BaseClusterer,
    'selfcondgan': selfcondgan.Clusterer,
    'online': online.Clusterer,
    'random_labels': random_labels.Clusterer
}


================================================
FILE: clusterers/base_clusterer.py
================================================
import copy

import torch
import numpy as np

class BaseClusterer():
    def __init__(self,
                 discriminator,
                 k_value=-1,
                 x_cluster=None,
                 batch_size=100,
                 **kwargs):
        ''' requires that self.x is not on the gpu, or else it hogs too much gpu memory ''' 
        self.cluster_counts = [0] * k_value
        self.discriminator = copy.deepcopy(discriminator)
        self.discriminator.eval()
        self.k = k_value
        self.kmeans = None
        self.x = x_cluster
        self.x_labels = None
        self.batch_size = batch_size

    def get_labels(self, x, y):
        return y

    def recluster(self, discriminator, **kwargs):
        return

    def get_features(self, x):
        ''' by default gets discriminator, but you can use other things '''
        return self.get_discriminator_output(x)

    def get_cluster_batch_features(self):
        ''' returns the discriminator features for the batch self.x as a numpy array '''
        with torch.no_grad():
            outputs = []
            x = self.x
            for batch in range(x.size(0) // self.batch_size):
                x_batch = x[batch * self.batch_size:(batch + 1) * self.batch_size].cuda()
                outputs.append(self.get_features(x_batch).detach().cpu())
            if (x.size(0) % self.batch_size != 0):
                x_batch = x[x.size(0) // self.batch_size * self.batch_size:].cuda()
                outputs.append(self.get_features(x_batch).detach().cpu())
            result = torch.cat(outputs, dim=0).numpy()
            return result

    def get_discriminator_output(self, x):
        '''returns discriminator features'''
        self.discriminator.eval()
        with torch.no_grad():
            return self.discriminator(x, get_features=True)

    def get_label_distribution(self, x=None):
        '''returns the empirical distributon of clustering'''
        y = self.x_labels if x is None else self.get_labels(x, None)
        counts = [0] * self.k
        for yi in y:
            counts[yi] += 1
        return counts

    def sample_y(self, batch_size):
        '''samples y according to the empirical distribution (not sure if used anymore)'''
        distribution = self.get_label_distribution()
        distribution = [i / sum(distribution) for i in distribution]
        m = torch.distributions.Multinomial(batch_size,
                                            torch.tensor(distribution))
        return m.sample()

    def print_label_distribution(self, x=None):
        print(self.get_label_distribution(x))


================================================
FILE: clusterers/kmeans.py
================================================
import torch
import numpy as np
from sklearn.cluster import KMeans

from clusterers import base_clusterer

class Clusterer(base_clusterer.BaseClusterer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.mapping = list(range(self.k))

    def kmeans_fit_predict(self, features, init='k-means++', n_init=10):
        '''fits kmeans, and returns the predictions of the kmeans'''
        print('Fitting k-means w data shape', features.shape)
        self.kmeans = KMeans(init=init, n_clusters=self.k,
                             n_init=n_init).fit(features)
        return self.kmeans.predict(features)

    def get_labels(self, x, y):
        d_features = self.get_features(x).detach().cpu().numpy()
        np_prediction = self.kmeans.predict(d_features)
        permuted_prediction = np.array([self.mapping[x] for x in np_prediction])
        return torch.from_numpy(permuted_prediction).long().cuda()


================================================
FILE: clusterers/online.py
================================================
import copy, random

import torch
import numpy as np

from clusterers import kmeans


class Clusterer(kmeans.Clusterer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.burned_in = False

    def get_initialization(self, features, labels):
        '''given points (from new discriminator) and their old assignments as np arrays, compute the induced means as a np array'''
        means = []
        for i in range(self.k):
            mask = (labels == i)
            mean = np.zeros(features[0].shape)
            numels = mask.astype(int).sum()
            if numels > 0:
                for index, equal in enumerate(mask):
                    if equal: mean += features[index]
                means.append(mean / numels)
            else:
                # use kmeans++ init if cluster is starved
                rand_point = random.randint(0, features.size(0) - 1)
                means.append(features[rand_point])
        result = np.array(means)
        return result

    def recluster(self, discriminator, x_batch=None, **kwargs):
        if self.kmeans is None:
            print('kmeans clustering as initialization')
            self.discriminator = copy.deepcopy(discriminator)
            features = self.get_cluster_batch_features()
            self.x_labels = self.kmeans_fit_predict(features)
        else:
            self.discriminator = discriminator
            if not self.burned_in:
                print('Burned in: computing initialization for kmeans')
                features = self.get_cluster_batch_features()
                initialization = self.get_initialization(
                    features, self.x_labels)
                self.kmeans_fit_predict(features, init=initialization)
                self.burned_in = True
            else:
                assert x_batch is not None
                self.discriminator = discriminator
                features = self.get_features(x_batch).detach().cpu().numpy()
                y_pred = self.kmeans.predict(features)

                for xi, yi in zip(features, y_pred):
                    self.cluster_counts[yi] += 1
                    difference = xi - self.kmeans.cluster_centers_[yi]
                    step_size = 1.0 / self.cluster_counts[yi]
                    self.kmeans.cluster_centers_[
                        yi] = self.kmeans.cluster_centers_[yi] + step_size * (
                            difference)


================================================
FILE: clusterers/random_labels.py
================================================
import torch
from clusterers import base_clusterer


class Clusterer(base_clusterer.BaseClusterer):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def get_labels(self, x, y):
        return torch.randint(low=0, high=self.k, size=y.shape).long().cuda()

================================================
FILE: clusterers/selfcondgan.py
================================================
import copy, random

import torch
import numpy as np
from sklearn.utils.linear_assignment_ import linear_assignment

from clusterers import kmeans


class Clusterer(kmeans.Clusterer):
    def __init__(self, initialization=True, matching=True, **kwargs):
        self.initialization = initialization
        self.matching = matching

        super().__init__(**kwargs)

    def get_initialization(self, features, labels):
        '''given points (from new discriminator) and their old assignments as np arrays, compute the induced means as a np array'''
        means = []
        for i in range(self.k):
            mask = (labels == i)
            mean = np.zeros(features[0].shape)
            numels = mask.astype(int).sum()
            if numels > 0:
                for index, equal in enumerate(mask):
                    if equal: mean += features[index]
                means.append(mean / numels)
            else:
                # use kmeans++ init if cluster is starved
                rand_point = random.randint(0, features.size(0) - 1)
                means.append(features[rand_point])
        result = np.array(means)
        return result

    def fit_means(self):
        features = self.get_cluster_batch_features()

        # if clustered already, use old assignments for the cluster mean
        if self.x_labels is not None and self.initialization:
            print('Initializing k-means with previous cluster assignments')
            initialization = self.get_initialization(features, self.x_labels)
        else:
            initialization = 'k-means++'

        new_classes = self.kmeans_fit_predict(features, init=initialization)

        # we've clustered already, so compute the permutation
        if self.x_labels is not None and self.matching:
            print('Doing cluster matching')
            matching = self.hungarian_match(new_classes, self.x_labels, self.k,
                                            self.k)
            self.mapping = [int(j) for i, j in sorted(matching)]

        # recompute the fixed labels
        self.x_labels = np.array([self.mapping[x] for x in new_classes])

    def recluster(self, discriminator, **kwargs):
        self.discriminator = copy.deepcopy(discriminator)
        self.fit_means()

    def hungarian_match(self, flat_preds, flat_targets, preds_k, targets_k):
        '''takes in np arrays flat_preds, flat_targets of integers'''
        num_samples = flat_targets.shape[0]

        assert (preds_k == targets_k)  # one to one
        num_k = preds_k
        num_correct = np.zeros((num_k, num_k))

        for c1 in range(num_k):
            for c2 in range(num_k):
                votes = int(((flat_preds == c1) * (flat_targets == c2)).sum())
                num_correct[c1, c2] = votes

        # num_correct is small
        match = linear_assignment(num_samples - num_correct)

        # return as list of tuples, out_c to gt_c
        res = []
        for out_c, gt_c in match:
            res.append((out_c, gt_c))

        return res


================================================
FILE: configs/cifar/conditional.yaml
================================================
generator:
  nlabels: 10
  conditioning: embedding
discriminator:
  nlabels: 10
  conditioning: mask
inherit_from: configs/cifar/default.yaml
training:
  out_dir: output/cifar/conditional

================================================
FILE: configs/cifar/default.yaml
================================================
data:
  type: cifar10
  train_dir: data/CIFAR
  img_size: 32
  nlabels: 10
generator:
  name: dcgan_deep
  nlabels: 1
  conditioning: unconditional
  kwargs:
    placeholder: None
discriminator:
  name: dcgan_deep
  nlabels: 1
  conditioning: unconditional
  kwargs:
    placeholder: None
z_dist:
  type: gauss
  dim: 128
clusterer:
  name: supervised
  nimgs: 25000
  kwargs: 
    placeholder: None
training:
  gan_type: standard
  reg_type: none
  reg_param: 0.
  take_model_average: false
  sample_nlabels: 20
  log_every: 1000
  inception_every: 10000
  batch_size: 64

================================================
FILE: configs/cifar/selfcondgan.yaml
================================================
generator:
  nlabels: 100
  conditioning: embedding
discriminator:
  nlabels: 100
  conditioning: mask
clusterer:
  name: selfcondgan
  kwargs: 
    k_value: 100
inherit_from: configs/cifar/default.yaml
training:
  out_dir: output/cifar/selfcondgan
  recluster_every: 25000

================================================
FILE: configs/cifar/unconditional.yaml
================================================
inherit_from: configs/cifar/default.yaml
training:
  out_dir: output/cifar/unconditional

================================================
FILE: configs/default.yaml
================================================
data:
  type: lsun
  train_dir: data/LSUN
  deterministic: False
  img_size: 128
  nlabels: 1
generator:
  name: resnet
  nlabels: 1
  conditioning: unconditional
  kwargs: 
    placeholder: None
discriminator:
  name: resnet
  nlabels: 1
  conditioning: unconditional
  kwargs: 
    pack_size: 1
    placeholder: None
clusterer:
  name: supervised
  nimgs: 100
  kwargs: 
    num_components: -1
z_dist:
  type: gauss
  dim: 256
training:
  out_dir: output/default
  gan_type: standard
  reg_type: real
  reg_param: 10.
  log_every: 1
  batch_size: 128
  ntest: 128
  nworkers: 72
  burnin_time: 0
  take_model_average: true
  model_average_beta: 0.999
  monitoring: tensorboard
  sample_every: 5000
  sample_nlabels: 20
  inception_every: 10000
  inception_nsamples: 50000
  backup_every: 10000
  recluster_every: 10000
  optimizer: adam
  lr_g: 0.0001
  lr_d: 0.0001
  beta1: 0.0
  beta2: 0.99
pretrained: {}

================================================
FILE: configs/imagenet/conditional.yaml
================================================
generator:
  nlabels: 1000
  conditioning: embedding
discriminator:
  nlabels: 1000
  conditioning: mask
inherit_from: configs/imagenet/default.yaml
training:
  out_dir: output/imagenet/conditional
  

================================================
FILE: configs/imagenet/default.yaml
================================================
data:
  type: image
  train_dir: data/ImageNet/train
  test_dir: data/ImageNet/val
  img_size: 128
  nlabels: 1000
generator:
  name: resnet2
  nlabels: 1
  conditioning: unconditional
discriminator:
  name: resnet2
  nlabels: 1
  conditioning: unconditional
z_dist:
  type: gauss
  dim: 256
clusterer:
  name: supervised
training:
  gan_type: standard
  reg_type: real
  reg_param: 10.
  take_model_average: true
  model_average_beta: 0.999
  sample_nlabels: 20
  log_every: 10
  inception_every: 10000
  backup_every: 5000
  batch_size: 128

================================================
FILE: configs/imagenet/selfcondgan.yaml
================================================
generator:
  nlabels: 100
  conditioning: embedding
discriminator:
  nlabels: 100
  conditioning: mask
clusterer:
  name: selfcondgan
  nimgs: 50000
  kwargs: 
    k_value: 100
inherit_from: configs/imagenet/default.yaml
training:
  out_dir: output/imagenet/selfcondgan
  recluster_every: 75000
  reg_param: 0.1

================================================
FILE: configs/imagenet/unconditional.yaml
================================================
generator:
  nlabels: 1
  conditioning: unconditional
discriminator:
  nlabels: 1
  conditioning: unconditional
inherit_from: configs/imagenet/default.yaml
training:
  out_dir: output/imagenet/unconditional

================================================
FILE: configs/places/conditional.yaml
================================================
generator:
  nlabels: 365
  conditioning: embedding
discriminator:
  nlabels: 365
  conditioning: mask
training:
  out_dir: output/places/conditional
inherit_from: configs/places/default.yaml


================================================
FILE: configs/places/default.yaml
================================================
data:
  type: image
  train_dir: data/places365/train
  test_dir: data/places365/val
  img_size: 128
  nlabels: 365
generator:
  name: resnet2
  nlabels: 1
  conditioning: unconditional
discriminator:
  name: resnet2
  nlabels: 1
  conditioning: unconditional
z_dist:
  type: gauss
  dim: 256
clusterer:
  name: supervised
training:
  gan_type: standard
  reg_type: real
  reg_param: 10.
  take_model_average: true
  model_average_beta: 0.999
  sample_nlabels: 20
  log_every: 10
  inception_every: 10000
  backup_every: 5000
  batch_size: 128
  

================================================
FILE: configs/places/selfcondgan.yaml
================================================
generator:
  nlabels: 100
  conditioning: embedding
discriminator:
  nlabels: 100
  conditioning: mask
clusterer:
  name: selfcondgan
  nimgs: 50000
  kwargs: 
    k_value: 100
inherit_from: configs/places/default.yaml
training:
  out_dir: output/places/selfcondgan
  recluster_every: 75000
  reg_param: 0.1

================================================
FILE: configs/places/unconditional.yaml
================================================
generator:
  nlabels: 1
  conditioning: embedding
discriminator:
  nlabels: 1
  conditioning: mask
inherit_from: configs/places/default.yaml
training:
  out_dir: output/places/unconditional


================================================
FILE: configs/pretrained/imagenet/conditional.yaml
================================================
generator:
  nlabels: 1000
  conditioning: embedding
discriminator:
  nlabels: 1000
  conditioning: mask
inherit_from: configs/imagenet/default.yaml
training:
  out_dir: output/pretrained/imagenet/class_conditional
pretrained:
  model: http://selfcondgan.csail.mit.edu/weights/classcondgan_i_model.pt


================================================
FILE: configs/pretrained/imagenet/selfcondgan.yaml
================================================
generator:
  nlabels: 100
  conditioning: embedding
discriminator:
  nlabels: 100
  conditioning: mask
clusterer:
  name: selfcondgan
  nimgs: 50000
  kwargs: 
    k_value: 100
inherit_from: configs/imagenet/default.yaml
training:
  out_dir: output/pretrained/imagenet/selfcondgan
  recluster_every: 75000
  reg_param: 0.1
pretrained:
  model: http://selfcondgan.csail.mit.edu/weights/selfcondgan_i_model.pt
  clusterer: http://selfcondgan.csail.mit.edu/weights/selfcondgan_i_clusterer.pkl

================================================
FILE: configs/pretrained/imagenet/unconditional.yaml
================================================
generator:
  nlabels: 1
  conditioning: unconditional
discriminator:
  nlabels: 1
  conditioning: unconditional
inherit_from: configs/imagenet/default.yaml
training:
  out_dir: output/pretrained/imagenet/unconditional
pretrained:
  model: http://selfcondgan.csail.mit.edu/weights/uncondgan_i_model.pt


================================================
FILE: configs/pretrained/places/conditional.yaml
================================================
generator:
  nlabels: 365
  conditioning: embedding
discriminator:
  nlabels: 365
  conditioning: mask
training:
  out_dir: output/pretrained/places/class_conditional
inherit_from: configs/places/default.yaml
pretrained:
  model: http://selfcondgan.csail.mit.edu/weights/classcondgan_p_model.pt


================================================
FILE: configs/pretrained/places/selfcondgan.yaml
================================================
generator:
  nlabels: 100
  conditioning: embedding
discriminator:
  nlabels: 100
  conditioning: mask
clusterer:
  name: selfcondgan
  nimgs: 50000
  kwargs: 
    k_value: 100
inherit_from: configs/places/default.yaml
training:
  out_dir: output/pretrained/places/selfcondgan
  reg_param: 0.1
pretrained:
  model: http://selfcondgan.csail.mit.edu/weights/selfcondgan_p_model.pt
  clusterer: http://selfcondgan.csail.mit.edu/weights/selfcondgan_p_clusterer.pkl

================================================
FILE: configs/pretrained/places/unconditional.yaml
================================================
generator:
  nlabels: 1
  conditioning: embedding
discriminator:
  nlabels: 1
  conditioning: mask
inherit_from: configs/places/default.yaml
training:
  out_dir: output/pretrained/places/unconditional
pretrained:
  model: http://selfcondgan.csail.mit.edu/weights/uncondgan_p_model.pt


================================================
FILE: configs/stacked_mnist/conditional.yaml
================================================
generator:
  nlabels: 1000
  conditioning: embedding
discriminator:
  nlabels: 1000
  conditioning: mask
inherit_from: configs/stacked_mnist/default.yaml
training:
  out_dir: output/stacked_mnist/conditional

================================================
FILE: configs/stacked_mnist/default.yaml
================================================
data:
  type: stacked_mnist
  train_dir: data/MNIST
  img_size: 32
  nlabels: 1000
generator:
  name: dcgan_shallow
  nlabels: 1
  conditioning: unconditional
  kwargs:
    placeholder: None
discriminator:
  name: dcgan_shallow
  nlabels: 1
  conditioning: unconditional
  kwargs:
    placeholder: None
z_dist:
  type: gauss
  dim: 128
clusterer:
  name: supervised
  nimgs: 25000
  kwargs: 
    placeholder: None
training:
  gan_type: standard
  reg_type: none
  reg_param: 0.
  take_model_average: false
  sample_nlabels: 20
  log_every: 1000
  backup_every: 5000
  inception_every: 10000
  batch_size: 64

================================================
FILE: configs/stacked_mnist/selfcondgan.yaml
================================================
generator:
  nlabels: 100
  conditioning: embedding
discriminator:
  nlabels: 100
  conditioning: mask
clusterer:
  name: selfcondgan
  kwargs: 
    k_value: 100
inherit_from: configs/stacked_mnist/default.yaml
training:
  out_dir: output/stacked_mnist/selfcondgan
  recluster_every: 25000

================================================
FILE: configs/stacked_mnist/unconditional.yaml
================================================
inherit_from: configs/stacked_mnist/default.yaml
training:
  out_dir: output/stacked_mnist/unconditional

================================================
FILE: gan_training/__init__.py
================================================


================================================
FILE: gan_training/checkpoints.py
================================================
import os, pickle
import urllib
import torch
import numpy as np
from torch.utils import model_zoo


class CheckpointIO(object):
    ''' CheckpointIO class.

    It handles saving and loading checkpoints.

    Args:
        checkpoint_dir (str): path where checkpoints are saved
    '''

    def __init__(self, checkpoint_dir='./chkpts', **kwargs):
        self.module_dict = kwargs
        self.checkpoint_dir = checkpoint_dir
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)

    def register_modules(self, **kwargs):
        ''' Registers modules in current module dictionary.
        '''
        self.module_dict.update(kwargs)

    def save(self, filename, **kwargs):
        ''' Saves the current module dictionary.

        Args:
            filename (str): name of output file
        '''
        if not os.path.isabs(filename):
            filename = os.path.join(self.checkpoint_dir, filename)

        outdict = kwargs
        for k, v in self.module_dict.items():
            outdict[k] = v.state_dict()
        torch.save(outdict, filename)

    def load(self, filename, pretrained={}):
        '''Loads a module dictionary from local file or url.

        Args:
            filename (str): name of saved module dictionary
        '''
        if 'model' in pretrained:
            filename = pretrained['model']
        if is_url(filename):
            return self.load_url(filename)
        else:
            return self.load_file(filename)

    def load_file(self, filename):
        '''Loads a module dictionary from file.

        Args:
            filename (str): name of saved module dictionary
        '''

        if not os.path.isabs(filename):
            filename = os.path.join(self.checkpoint_dir, filename)

        if os.path.exists(filename):
            print('=> Loading checkpoint from local file...', filename)
            state_dict = torch.load(filename)
            scalars = self.parse_state_dict(state_dict)
            return scalars
        else:
            print('File not found', filename)
            raise FileNotFoundError

    def load_url(self, url):
        '''Load a module dictionary from url.

        Args:
            url (str): url to saved model
        '''
        print('=> Loading checkpoint from url...', url)
        state_dict = model_zoo.load_url(url, model_dir=self.checkpoint_dir, progress=True)
        scalars = self.parse_state_dict(state_dict)
        return scalars

    def parse_state_dict(self, state_dict):
        '''Parse state_dict of model and return scalars.

        Args:
            state_dict (dict): State dict of model
    '''
        for k, v in self.module_dict.items():
            if k in state_dict:
                v.load_state_dict(state_dict[k])
            else:
                print('Warning: Could not find %s in checkpoint!' % k)
        scalars = {
            k: v
            for k, v in state_dict.items() if k not in self.module_dict
        }
        return scalars

    def load_clusterer(self, it, load_samples, pretrained={}):
        if 'clusterer' in pretrained:
            pretrained_file = os.path.join(self.checkpoint_dir, 'pretrained_clusterer.pkl')
            if not os.path.exists(pretrained_file):
                import cloudpickle as cp
                from urllib.request import urlopen
                print('Loading pretrained clusterer from', pretrained['clusterer'])
                clusterer = cp.load(urlopen(pretrained['clusterer'])) 
                print('Saving pretrained clusterer to', pretrained_file)
                with open(pretrained_file, 'wb') as f:
                    f.write(pickle.dumps(clusterer))
            else:
                with open(pretrained_file, 'rb') as f:
                    clusterer = pickle.load(f)
            return clusterer
        else:
            print('Loading clusterer:')
            with open(os.path.join(self.checkpoint_dir, f'clusterer{it}.pkl'), 'rb') as f:
                clusterer = pickle.load(f)

            if load_samples:
                print('Loading cluster samples:')
                with np.load(os.path.join(self.checkpoint_dir, 'cluster_samples.npz')) as f:
                    x = f['x']
                clusterer.x = torch.from_numpy(x)
            return clusterer

    def load_models(self, it, pretrained={}, load_samples=False):
        try:
            load_dict = self.load('model_%08d.pt' % it, pretrained)
            epoch_idx = load_dict.get('epoch_idx', -1)
        except Exception as e:  #models are not dataparallel modules
            print('Trying again to load w/o data parallel modules')
            try:
                for name, module in self.module_dict.items():
                    if isinstance(module, torch.nn.DataParallel):
                        self.module_dict[name] = module.module
                load_dict = self.load('model_%08d.pt' % it, pretrained)
                epoch_idx = load_dict.get('epoch_idx', -1)
            except FileNotFoundError as e:
                print(e)
                print("Models not found")
                it = epoch_idx = -1
        
        try:
            clusterer = self.load_clusterer(it, load_samples, pretrained)
        except FileNotFoundError as e:
            clusterer = None

        return it, epoch_idx, clusterer
    
    def save_clusterer(self, clusterer, it):
        with open(os.path.join(self.checkpoint_dir, f'clusterer{it}.pkl'), 'wb') as f:
            #hack: only save changing data
            x = clusterer.x
            clusterer.x = None
            pickle.dump(clusterer, f)
            clusterer.x = x

def is_url(url):
    scheme = urllib.parse.urlparse(url).scheme
    return scheme in ('http', 'https')


================================================
FILE: gan_training/config.py
================================================
import yaml
from torch import optim
from os import path
from gan_training.models import generator_dict, discriminator_dict
from gan_training.train import toggle_grad
from clusterers import clusterer_dict


# General config
def load_config(path, default_path):
    ''' Loads config file.

    Args:  
        path (str): path to config file
        default_path (bool): whether to use default path
    '''
    # Load configuration from file itself
    with open(path, 'r') as f:
        cfg_special = yaml.load(f)

    # Check if we should inherit from a config
    inherit_from = cfg_special.get('inherit_from')

    # If yes, load this config first as default
    # If no, use the default_path
    if inherit_from is not None:
        cfg = load_config(inherit_from, default_path)
    elif default_path is not None:
        with open(default_path, 'r') as f:
            cfg = yaml.load(f)
    else:
        cfg = dict()

    # Include main configuration
    update_recursive(cfg, cfg_special)

    return cfg


def update_recursive(dict1, dict2):
    ''' Update two config dictionaries recursively.

    Args:
        dict1 (dict): first dictionary to be updated
        dict2 (dict): second dictionary which entries should be used

    '''
    for k, v in dict2.items():
        # Add item if not yet in dict1
        if k not in dict1:
            dict1[k] = None
        # Update
        if isinstance(dict1[k], dict):
            update_recursive(dict1[k], v)
        else:
            dict1[k] = v


def get_clusterer(config):
    return clusterer_dict[config['clusterer']['name']]


def build_models(config):
    # Get classes
    Generator = generator_dict[config['generator']['name']]
    Discriminator = discriminator_dict[config['discriminator']['name']]

    # Build models
    generator = Generator(z_dim=config['z_dist']['dim'],
                          nlabels=config['generator']['nlabels'],
                          size=config['data']['img_size'],
                          conditioning=config['generator']['conditioning'],
                          **config['generator']['kwargs'])
    discriminator = Discriminator(
        nlabels=config['discriminator']['nlabels'],
        conditioning=config['discriminator']['conditioning'],
        size=config['data']['img_size'],
        **config['discriminator']['kwargs'])

    return generator, discriminator


def build_optimizers(generator, discriminator, config):
    optimizer = config['training']['optimizer']
    lr_g = config['training']['lr_g']
    lr_d = config['training']['lr_d']
    

    toggle_grad(generator, True)
    toggle_grad(discriminator, True)

    g_params = generator.parameters()
    d_params = discriminator.parameters()

    if optimizer == 'rmsprop':
        g_optimizer = optim.RMSprop(g_params, lr=lr_g, alpha=0.99, eps=1e-8)
        d_optimizer = optim.RMSprop(d_params, lr=lr_d, alpha=0.99, eps=1e-8)
    elif optimizer == 'adam':
        beta1 = config['training']['beta1']
        beta2 = config['training']['beta2']
        g_optimizer = optim.Adam(g_params, lr=lr_g, betas=(beta1, beta2), eps=1e-8)
        d_optimizer = optim.Adam(d_params, lr=lr_d, betas=(beta1, beta2), eps=1e-8)
    elif optimizer == 'sgd':
        g_optimizer = optim.SGD(g_params, lr=lr_g, momentum=0.)
        d_optimizer = optim.SGD(d_params, lr=lr_d, momentum=0.)

    return g_optimizer, d_optimizer


# Some utility functions
def get_parameter_groups(parameters, gradient_scales, base_lr):
    param_groups = []
    for p in parameters:
        c = gradient_scales.get(p, 1.)
        param_groups.append({'params': [p], 'lr': c * base_lr})
    return param_groups


================================================
FILE: gan_training/distributions.py
================================================
import torch
from torch import distributions


def get_zdist(dist_name, dim, device=None):
    # Get distribution
    if dist_name == 'uniform':
        low = -torch.ones(dim, device=device)
        high = torch.ones(dim, device=device)
        zdist = distributions.Uniform(low, high)
    elif dist_name == 'gauss':
        mu = torch.zeros(dim, device=device)
        scale = torch.ones(dim, device=device)
        zdist = distributions.Normal(mu, scale)
    else:
        raise NotImplementedError

    # Add dim attribute
    zdist.dim = dim

    return zdist


def get_ydist(nlabels, device=None):
    logits = torch.zeros(nlabels, device=device)
    ydist = distributions.categorical.Categorical(logits=logits)

    # Add nlabels attribute
    ydist.nlabels = nlabels

    return ydist


def interpolate_sphere(z1, z2, t):
    p = (z1 * z2).sum(dim=-1, keepdim=True)
    p = p / z1.pow(2).sum(dim=-1, keepdim=True).sqrt()
    p = p / z2.pow(2).sum(dim=-1, keepdim=True).sqrt()
    omega = torch.acos(p)
    s1 = torch.sin((1-t)*omega)/torch.sin(omega)
    s2 = torch.sin(t*omega)/torch.sin(omega)
    z = s1 * z1 + s2 * z2

    return z


================================================
FILE: gan_training/eval.py
================================================
import numpy as np
import torch
from torch.nn import functional as F

from gan_training.metrics import inception_score

class Evaluator(object):
    def __init__(self,
                 generator,
                 zdist,
                 ydist,
                 train_loader,
                 clusterer,
                 batch_size=64,
                 inception_nsamples=10000,
                 device=None):
        self.generator = generator
        self.clusterer = clusterer
        self.train_loader = train_loader
        self.zdist = zdist
        self.ydist = ydist
        self.inception_nsamples = inception_nsamples
        self.batch_size = batch_size
        self.device = device

    def sample_z(self, batch_size):
        return self.zdist.sample((batch_size, )).to(self.device)

    def get_y(self, x, y):
        return self.clusterer.get_labels(x, y).to(self.device)

    def get_fake_real_samples(self, N):
        ''' returns N fake images and N real images in pytorch form'''
        with torch.no_grad():
            self.generator.eval()
            fake_imgs = []
            real_imgs = []
            while len(fake_imgs) < N:
                for x_real, y_gt in self.train_loader:
                    x_real = x_real.cuda()
                    z = self.sample_z(x_real.size(0))
                    y = self.get_y(x_real, y_gt)
                    samples = self.generator(z, y)
                    samples = [s.data.cpu() for s in samples]
                    fake_imgs.extend(samples)
                    real_batch = [img.data.cpu() for img in x_real]
                    real_imgs.extend(real_batch)
                    assert (len(real_imgs) == len(fake_imgs))
                    if len(fake_imgs) >= N:
                        fake_imgs = fake_imgs[:N]
                        real_imgs = real_imgs[:N]
                        return fake_imgs, real_imgs

    def compute_inception_score(self):
        imgs, _ = self.get_fake_real_samples(self.inception_nsamples)
        imgs = [img.numpy() for img in imgs]
        score, score_std = inception_score(imgs,
                                           device=self.device,
                                           resize=True,
                                           splits=1)

        return score, score_std

    def create_samples(self, z, y=None):
        self.generator.eval()
        batch_size = z.size(0)
        # Parse y
        if y is None:
            raise NotImplementedError()
        elif isinstance(y, int):
            y = torch.full((batch_size, ),
                           y,
                           device=self.device,
                           dtype=torch.int64)
        # Sample x
        with torch.no_grad():
            x = self.generator(z, y)
        return x


================================================
FILE: gan_training/inputs.py
================================================
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import numpy as np

import os
import torch.utils.data as data
from torchvision.datasets.folder import default_loader
from PIL import Image
import random

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

def get_dataset(name,
                data_dir,
                size=64,
                lsun_categories=None,
                deterministic=False,
                transform=None):
                
    transform = transforms.Compose([
        t for t in [
            transforms.Resize(size),
            transforms.CenterCrop(size),
            (not deterministic) and transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
            (not deterministic) and
            transforms.Lambda(lambda x: x + 1. / 128 * torch.rand(x.size())),
        ] if t is not False
    ]) if transform == None else transform

    if name == 'image':
        print('Using image labels')
        dataset = datasets.ImageFolder(data_dir, transform)
        nlabels = len(dataset.classes)
    elif name == 'webp':
        print('Using no labels from webp')
        dataset = CachedImageFolder(data_dir, transform)
        nlabels = len(dataset.classes)
    elif name == 'npy':
        # Only support normalization for now
        dataset = datasets.DatasetFolder(data_dir, npy_loader, ['npy'])
        nlabels = len(dataset.classes)
    elif name == 'cifar10':
        dataset = datasets.CIFAR10(root=data_dir,
                                   train=True,
                                   download=True,
                                   transform=transform)
        nlabels = 10
    elif name == 'stacked_mnist':
        dataset = StackedMNIST(data_dir,
                               transform=transforms.Compose([
                                   transforms.Resize(size),
                                   transforms.CenterCrop(size),
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.5, ), (0.5, ))
                               ]))
        nlabels = 1000
    elif name == 'lsun':
        if lsun_categories is None:
            lsun_categories = 'train'
        dataset = datasets.LSUN(data_dir, lsun_categories, transform)
        nlabels = len(dataset.classes)
    elif name == 'lsun_class':
        dataset = datasets.LSUNClass(data_dir,
                                     transform,
                                     target_transform=(lambda t: 0))
        nlabels = 1
    else:
        raise NotImplemented
    return dataset, nlabels

class CachedImageFolder(data.Dataset):
    """
    A version of torchvision.dataset.ImageFolder that takes advantage
    of cached filename lists.
    photo/park/004234.jpg
    photo/park/004236.jpg
    photo/park/004237.jpg
    """

    def __init__(self, root, transform=None, loader=default_loader):
        classes, class_to_idx = find_classes(root)
        self.imgs = make_class_dataset(root, class_to_idx)
        if len(self.imgs) == 0:
            raise RuntimeError("Found 0 images within: %s" % root)
        self.root = root
        self.classes = classes
        self.class_to_idx = class_to_idx
        self.transform = transform
        self.loader = loader

    def __getitem__(self, index):
        path, classidx = self.imgs[index]
        source = self.loader(path)
        if self.transform is not None:
            source = self.transform(source)
        return source, classidx

    def __len__(self):
        return len(self.imgs)

class StackedMNIST(data.Dataset):
    def __init__(self, data_dir, transform, batch_size=100000):
        super().__init__()
        self.channel1 = datasets.MNIST(data_dir,
                                       transform=transform,
                                       train=True,
                                       download=True)
        self.channel2 = datasets.MNIST(data_dir,
                                       transform=transform,
                                       train=True,
                                       download=True)
        self.channel3 = datasets.MNIST(data_dir,
                                       transform=transform,
                                       train=True,
                                       download=True)
        self.indices = {
            k: (random.randint(0,
                               len(self.channel1) - 1),
                random.randint(0,
                               len(self.channel1) - 1),
                random.randint(0,
                               len(self.channel1) - 1))
            for k in range(batch_size)
        }

    def __getitem__(self, index):
        index1, index2, index3 = self.indices[index]
        x1, y1 = self.channel1[index1]
        x2, y2 = self.channel2[index2]
        x3, y3 = self.channel3[index3]
        return torch.cat([x1, x2, x3], dim=0), y1 * 100 + y2 * 10 + y3

    def __len__(self):
        return len(self.indices)
        

def is_npy_file(path):
    return path.endswith('.npy') or path.endswith('.NPY')


def walk_image_files(rootdir):
    print(rootdir)
    if os.path.isfile('%s.txt' % rootdir):
        print('Loading file list from %s.txt instead of scanning dir' %
              rootdir)
        basedir = os.path.dirname(rootdir)
        with open('%s.txt' % rootdir) as f:
            result = sorted([
                os.path.join(basedir, line.strip()) for line in f.readlines()
            ])
            import random
            random.Random(1).shuffle(result)
            return result
    result = []

    IMG_EXTENSIONS = [
        '.jpg',
        '.JPG',
        '.jpeg',
        '.JPEG',
        '.png',
        '.PNG',
        '.ppm',
        '.PPM',
        '.bmp',
        '.BMP',
    ]

    for dirname, _, fnames in sorted(os.walk(rootdir)):
        for fname in sorted(fnames):
            if any(fname.endswith(extension)
                   for extension in IMG_EXTENSIONS) or is_npy_file(fname):
                result.append(os.path.join(dirname, fname))
    return result


def find_classes(dir):
    classes = [
        d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))
    ]
    classes.sort()
    class_to_idx = {classes[i]: i for i in range(len(classes))}
    return classes, class_to_idx


def make_class_dataset(source_root, class_to_idx):
    """
    Returns (source, classnum, feature)
    """
    imagepairs = []
    source_root = os.path.expanduser(source_root)
    for path in walk_image_files(source_root):
        classname = os.path.basename(os.path.dirname(path))
        imagepairs.append((path, 0))
    return imagepairs


def npy_loader(path):
    img = np.load(path)

    if img.dtype == np.uint8:
        img = img.astype(np.float32)
        img = img / 127.5 - 1.
    elif img.dtype == np.float32:
        img = img * 2 - 1.
    else:
        raise NotImplementedError

    img = torch.Tensor(img)
    if len(img.size()) == 4:
        img.squeeze_(0)

    return img


================================================
FILE: gan_training/logger.py
================================================
import pickle
import os
import torchvision
import copy


class Logger(object):
    def __init__(self,
                 log_dir='./logs',
                 img_dir='./imgs',
                 monitoring=None,
                 monitoring_dir=None):
        self.stats = dict()
        self.log_dir = log_dir
        self.img_dir = img_dir

        if not os.path.exists(log_dir):
            os.makedirs(log_dir)

        if not os.path.exists(img_dir):
            os.makedirs(img_dir)

        if not (monitoring is None or monitoring == 'none'):
            self.setup_monitoring(monitoring, monitoring_dir)
        else:
            self.monitoring = None
            self.monitoring_dir = None

    def setup_monitoring(self, monitoring, monitoring_dir=None):
        self.monitoring = monitoring
        self.monitoring_dir = monitoring_dir

        if monitoring == 'telemetry':
            import telemetry
            self.tm = telemetry.ApplicationTelemetry()
            if self.tm.get_status() == 0:
                print('Telemetry successfully connected.')
        elif monitoring == 'tensorboard':
            import tensorboardX
            self.tb = tensorboardX.SummaryWriter(monitoring_dir)
        else:
            raise NotImplementedError('Monitoring tool "%s" not supported!' %
                                      monitoring)

    def add(self, category, k, v, it):
        if category not in self.stats:
            self.stats[category] = {}

        if k not in self.stats[category]:
            self.stats[category][k] = []

        self.stats[category][k].append((it, v))

        k_name = '%s/%s' % (category, k)
        if self.monitoring == 'telemetry':
            self.tm.metric_push_async({'metric': k_name, 'value': v, 'it': it})
        elif self.monitoring == 'tensorboard':
            self.tb.add_scalar(k_name, v, it)

    def add_imgs(self, imgs, class_name, it):
        outdir = os.path.join(self.img_dir, class_name)
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        outfile = os.path.join(outdir, '%08d.png' % it)

        imgs = imgs / 2 + 0.5
        imgs = torchvision.utils.make_grid(imgs)
        torchvision.utils.save_image(copy.deepcopy(imgs), outfile, nrow=8)

        if self.monitoring == 'tensorboard':
            self.tb.add_image(class_name, copy.deepcopy(imgs), it)

    def get_last(self, category, k, default=0.):
        if category not in self.stats:
            return default
        elif k not in self.stats[category]:
            return default
        else:
            return self.stats[category][k][-1][1]

    def save_stats(self, filename):
        filename = os.path.join(self.log_dir, filename)
        with open(filename, 'wb') as f:
            pickle.dump(self.stats, f)

    def load_stats(self, filename):
        filename = os.path.join(self.log_dir, filename)
        if not os.path.exists(filename):
            print('Warning: file "%s" does not exist!' % filename)
            return

        try:
            with open(filename, 'rb') as f:
                self.stats = pickle.load(f)
        except EOFError:
            print('Warning: log file corrupted!')


================================================
FILE: gan_training/metrics/__init__.py
================================================
from gan_training.metrics.inception_score import inception_score

__all__ = [
    inception_score
]


================================================
FILE: gan_training/metrics/clustering_metrics.py
================================================
def warn(*args, **kwargs):
    pass


import warnings
warnings.warn = warn

from sklearn.metrics.cluster import normalized_mutual_info_score, adjusted_rand_score, homogeneity_score
from sklearn import metrics

import numpy as np


def nmi(inferred, gt):
    return normalized_mutual_info_score(inferred, gt)


def acc(inferred, gt):
    gt = gt.astype(np.int64)
    assert inferred.size == gt.size
    D = max(inferred.max(), gt.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(inferred.size):
        w[inferred[i], gt[i]] += 1
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return sum([w[i, j] for i, j in ind]) * 1.0 / inferred.size


def purity_score(y_true, y_pred):
    contingency_matrix = metrics.cluster.contingency_matrix(y_true, y_pred)
    return np.sum(np.amax(contingency_matrix,
                          axis=0)) / np.sum(contingency_matrix)


def ari(inferred, gt):
    return adjusted_rand_score(gt, inferred)


def homogeneity(inferred, gt):
    return homogeneity_score(gt, inferred)


================================================
FILE: gan_training/metrics/fid.py
================================================
from __future__ import absolute_import, division, print_function
import numpy as np
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
from scipy import linalg
import pathlib
import urllib
from tqdm import tqdm
import warnings


def check_or_download_inception(inception_path):
    ''' Checks if the path to the inception file is valid, or downloads
        the file if it is not present. '''
    INCEPTION_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'
    if inception_path is None:
        inception_path = '/tmp'
    inception_path = pathlib.Path(inception_path)
    model_file = inception_path / 'classify_image_graph_def.pb'
    if not model_file.exists():
        print("Downloading Inception model")
        from urllib import request
        import tarfile
        fn, _ = request.urlretrieve(INCEPTION_URL)
        with tarfile.open(fn, mode='r') as f:
            f.extract('classify_image_graph_def.pb', str(model_file.parent))
    return str(model_file)


def create_inception_graph(pth):
    """Creates a graph from saved GraphDef file."""
    # Creates graph from saved graph_def.pb.
    with tf.io.gfile.GFile(pth, 'rb') as f:
        graph_def = tf.compat.v1.GraphDef()
        graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(graph_def, name='FID_Inception_Net')


def calculate_activation_statistics(images,
                                    sess,
                                    batch_size=200,
                                    verbose=False):
    """Calculation of the statistics used by the FID.
    Params:
    -- images      : Numpy array of dimension (n_images, hi, wi, 3). The values
                     must lie between 0 and 255.
    -- sess        : current session
    -- batch_size  : the images numpy array is split into batches with batch size
                     batch_size. A reasonable batch size depends on the available hardware.
    -- verbose     : If set to True and parameter out_step is given, the number of calculated
                     batches is reported.
    Returns:
    -- mu    : The mean over samples of the activations of the pool_3 layer of
               the incption model.
    -- sigma : The covariance matrix of the activations of the pool_3 layer of
               the incption model.
    """
    act = get_activations(images, sess, batch_size, verbose)
    mu = np.mean(act, axis=0)
    sigma = np.cov(act, rowvar=False)
    return mu, sigma


# code for handling inception net derived from
#   https://github.com/openai/improved-gan/blob/master/inception_score/model.py
def _get_inception_layer(sess):
    """Prepares inception net for batched usage and returns pool_3 layer. """
    layername = 'FID_Inception_Net/pool_3:0'
    pool3 = sess.graph.get_tensor_by_name(layername)
    ops = pool3.graph.get_operations()
    for op_idx, op in enumerate(ops):
        for o in op.outputs:
            shape = o.get_shape()
            if shape._dims != []:
                shape = [s.value for s in shape]
                new_shape = []
                for j, s in enumerate(shape):
                    if s == 1 and j == 0:
                        new_shape.append(None)
                    else:
                        new_shape.append(s)
                o.__dict__['_shape_val'] = tf.TensorShape(new_shape)
    return pool3


#-------------------------------------------------------------------------------


def get_activations(images, sess, batch_size=200, verbose=False):
    """Calculates the activations of the pool_3 layer for all images.
    Params:
    -- images      : Numpy array of dimension (n_images, hi, wi, 3). The values
                     must lie between 0 and 256.
    -- sess        : current session
    -- batch_size  : the images numpy array is split into batches with batch size
                     batch_size. A reasonable batch size depends on the disposable hardware.
    -- verbose    : If set to True and parameter out_step is given, the number of calculated
                     batches is reported.
    Returns:
    -- A numpy array of dimension (num images, 2048) that contains the
       activations of the given tensor when feeding inception with the query tensor.
    """
    inception_layer = _get_inception_layer(sess)
    n_images = images.shape[0]
    if batch_size > n_images:
        print(
            "warning: batch size is bigger than the data size. setting batch size to data size"
        )
        batch_size = n_images
    n_batches = n_images // batch_size
    pred_arr = np.empty((n_images, 2048))
    for i in tqdm(range(n_batches)):
        if verbose:
            print("\rPropagating batch %d/%d" % (i + 1, n_batches),
                  end="",
                  flush=True)
        start = i * batch_size

        if start + batch_size < n_images:
            end = start + batch_size
        else:
            end = n_images

        batch = images[start:end]
        pred = sess.run(inception_layer,
                        {'FID_Inception_Net/ExpandDims:0': batch})
        pred_arr[start:end] = pred.reshape(batch_size, -1)
    if verbose:
        print(" done")
    return pred_arr


#-------------------------------------------------------------------------------


def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
    """Numpy implementation of the Frechet Distance.
    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
    and X_2 ~ N(mu_2, C_2) is
            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
            
    Stable version by Dougal J. Sutherland.
    Params:
    -- mu1 : Numpy array containing the activations of the pool_3 layer of the
             inception net ( like returned by the function 'get_predictions')
             for generated samples.
    -- mu2   : The sample mean over activations of the pool_3 layer, precalcualted
               on an representive data set.
    -- sigma1: The covariance matrix over activations of the pool_3 layer for
               generated samples.
    -- sigma2: The covariance matrix over activations of the pool_3 layer,
               precalcualted on an representive data set.
    Returns:
    --   : The Frechet Distance.
    """

    mu1 = np.atleast_1d(mu1)
    mu2 = np.atleast_1d(mu2)

    sigma1 = np.atleast_2d(sigma1)
    sigma2 = np.atleast_2d(sigma2)

    assert mu1.shape == mu2.shape, "Training and test mean vectors have different lengths"
    assert sigma1.shape == sigma2.shape, "Training and test covariances have different dimensions"

    diff = mu1 - mu2

    # product might be almost singular
    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
    if not np.isfinite(covmean).all():
        msg = "fid calculation produces singular product; adding %s to diagonal of cov estimates" % eps
        warnings.warn(msg)
        offset = np.eye(sigma1.shape[0]) * eps
        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))

    # numerical error might give slight imaginary component
    if np.iscomplexobj(covmean):
        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
            m = np.max(np.abs(covmean.imag))
            raise ValueError("Imaginary component {}".format(m))
        covmean = covmean.real

    tr_covmean = np.trace(covmean)

    return diff.dot(diff) + np.trace(sigma1) + np.trace(
        sigma2) - 2 * tr_covmean


def compute_fid_from_npz(path):
    print(path)
    with np.load(path) as data:
        fake_imgs = data['fake']

        name = None
        for name in ['imagenet', 'cifar', 'places']:
            if name in path: 
                real_imgs = name
                break
        print('Inferred name', name)
        if name is None:
            real_imgs = data['real']
            
        if fake_imgs.shape[0] < 1000: return 0

    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        m1, s1 = calculate_activation_statistics(fake_imgs, sess)
        if isinstance(real_imgs, str):
            print(f'using cached image stats for {real_imgs}')
            with np.load(precomputed_stats[real_imgs]) as data:
                m2, s2 = data['m'], data['s']
        else:
            print('computing real images stats from scratch')
            m2, s2 = calculate_activation_statistics(real_imgs, sess)

    return calculate_frechet_distance(m1, s1, m2, s2)

precomputed_stats = {
    'places':
    'output/places_gt_stats.npz',
    'imagenet':
    'output/imagenet_gt_stats.npz',
    'cifar':
    'output/cifar_gt_stats.npz'
}


def compute_fid_from_imgs(fake_imgs, real_imgs):
    inception_path = check_or_download_inception(None)
    create_inception_graph(inception_path)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        m1, s1 = calculate_activation_statistics(fake_imgs, sess)
        if isinstance(real_imgs, str):
            with np.load(precomputed_stats[real_imgs]) as data:
                m2, s2 = data['m'], data['s']
        else:
            m2, s2 = calculate_activation_statistics(real_imgs, sess)
    return calculate_frechet_distance(m1, s1, m2, s2)

def compute_stats(exp_path):
    #TODO: a bit hacky
    if 'places' in exp_path and not os.path.exists(precomputed_stats['places']):
        with np.load('output/places_gt_imgs.npz') as data_real:
            real_imgs = data_real['real']
            print('loaded real places images', real_imgs.shape)
        inception_path = check_or_download_inception(None)
        create_inception_graph(inception_path)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            m, s = calculate_activation_statistics(real_imgs, sess)
        np.savez(precomputed_stats['places'], m=m, s=s)
    
    if 'imagenet' in exp_path and not os.path.exists(precomputed_stats['imagenet']):
        with np.load('output/imagenet_gt_imgs.npz') as data_real:
            real_imgs = data_real['real']
            print('loaded real imagenet images', real_imgs.shape)
        inception_path = check_or_download_inception(None)
        create_inception_graph(inception_path)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            m, s = calculate_activation_statistics(real_imgs, sess)
        np.savez(precomputed_stats['imagenet'], m=m, s=s)

    if 'cifar' in exp_path and not os.path.exists(precomputed_stats['cifar']):
        with np.load('output/cifar_gt_imgs.npz') as data_real:
            real_imgs = data_real['real']
            print('loaded real cifar images', real_imgs.shape)
        inception_path = check_or_download_inception(None)
        create_inception_graph(inception_path)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            m, s = calculate_activation_statistics(real_imgs, sess)
        np.savez(precomputed_stats['cifar'], m=m, s=s)

if __name__ == '__main__':
    import argparse
    import json

    parser = argparse.ArgumentParser('compute TF FID')
    parser.add_argument('--samples', help='path to samples')
    parser.add_argument('--it', type=str, help='path to samples')
    parser.add_argument('--results_dir', help='path to results_dir')
    args = parser.parse_args()
    
    it = args.it
    results_dir = args.results_dir

    compute_stats(args.samples)
    mean = compute_fid_from_npz(args.samples)
    print(f'FID: {mean}')
    
    if args.results_dir is not None:
        with open(os.path.join(args.results_dir, 'fid_results.json')) as f:
            fid_results = json.load(f)

        fid_results[it] = mean
        print(f'{results_dir} iteration {it} FID: {mean}')
        
        with open(os.path.join(args.results_dir, 'fid_results.json'), 'w') as f:
            f.write(json.dumps(fid_results))

================================================
FILE: gan_training/metrics/inception_score.py
================================================
import torch
from torch import nn
from torch.nn import functional as F
import torch.utils.data

from torchvision.models.inception import inception_v3

import numpy as np
from scipy.stats import entropy


def inception_score(imgs, device=None, batch_size=32, resize=False, splits=1):
    """Computes the inception score of the generated images imgs

    Args:
        imgs: Torch dataset of (3xHxW) numpy images normalized in the
              range [-1, 1]
        cuda: whether or not to run on GPU
        batch_size: batch size for feeding into Inception v3
        splits: number of splits
    """
    N = len(imgs)

    assert batch_size > 0
    assert N > batch_size

    # Set up dataloader
    dataloader = torch.utils.data.DataLoader(imgs, batch_size=batch_size)

    # Load inception model
    inception_model = inception_v3(pretrained=True, transform_input=False)
    inception_model = inception_model.to(device)
    inception_model.eval()
    up = nn.Upsample(size=(299, 299), mode='bilinear').to(device)

    def get_pred(x):
        with torch.no_grad():
            if resize:
                x = up(x)
            x = inception_model(x)
            out = F.softmax(x, dim=-1)
        out = out.cpu().numpy()
        return out

    # Get predictions
    preds = np.zeros((N, 1000))

    for i, batch in enumerate(dataloader, 0):
        batchv = batch.to(device)
        batch_size_i = batch.size()[0]

        preds[i * batch_size:i * batch_size + batch_size_i] = get_pred(batchv)

    # Now compute the mean kl-div
    split_scores = []

    for k in range(splits):
        part = preds[k * (N // splits):(k + 1) * (N // splits), :]
        py = np.mean(part, axis=0)
        scores = []
        for i in range(part.shape[0]):
            pyx = part[i, :]
            scores.append(entropy(pyx, py))
        split_scores.append(np.exp(np.mean(scores)))

    return np.mean(split_scores), np.std(split_scores)


================================================
FILE: gan_training/metrics/tf_is/LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: gan_training/metrics/tf_is/README.md
================================================
Inception Score
=====================================

A new Tensorflow implementation of the "Inception Score" (IS) for the evaluation of generative models, with a bug raised in [https://github.com/openai/improved-gan/issues/29](https://github.com/openai/improved-gan/issues/29) fixed. 

## Major Dependency
- `tensorflow >= 1.14`

## Features
- Fast, easy-to-use and memory-efficient, written in a way that is similar to the original implementation
- No prior knowledge about Tensorflow is necessary if your are using CPU or GPU
- Makes use of [TF-GAN](https://github.com/tensorflow/gan)
- Downloads InceptionV1 automatically
- Compatible with both Python 2 and Python 3

## Usage
- If you are working with GPU, use `inception_score.py`; if you are working with TPU, use `inception_score_tpu.py` and pass a Tensorflow Session and a [TPUStrategy](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/TPUStrategy) as additional arguments.
- Call `get_inception_score(images, splits=10)`, where `images` is a numpy array with values ranging from 0 to 255 and shape in the form `[N, 3, HEIGHT, WIDTH]` where `N`, `HEIGHT` and `WIDTH` can be arbitrary. `dtype` of the images is recommended to be `np.uint8` to save CPU memory.
- A smaller `BATCH_SIZE` reduces GPU/TPU memory usage, but at the cost of a slight slowdown.
- If you want to compute a general "Classifier Score" with probabilities `preds` from another classifier, call `preds2score(preds, splits=10)`. `preds` can be a numpy array of arbitrary shape `[N, num_classes]`.
## Links
- The Inception Score was proposed in the paper [Improved Techniques for Training GANs](https://arxiv.org/abs/1606.03498)
- Code for the [Fréchet Inception Distance](https://github.com/tsc2017/Frechet-Inception-Distance)


================================================
FILE: gan_training/metrics/tf_is/inception_score.py
================================================
'''
From https://github.com/tsc2017/Inception-Score
Code derived from https://github.com/openai/improved-gan/blob/master/inception_score/model.py and https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/gan/python/eval/python/classifier_metrics_impl.py

Usage:
    Call get_inception_score(images, splits=10)
Args:
    images: A numpy array with values ranging from 0 to 255 and shape in the form [N, 3, HEIGHT, WIDTH] where N, HEIGHT and WIDTH can be arbitrary. A dtype of np.uint8 is recommended to save CPU memory.
    splits: The number of splits of the images, default is 10.
Returns:
    Mean and standard deviation of the Inception Score across the splits.
'''

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import tensorflow as tf
import functools
import numpy as np
import time
from tqdm import tqdm
from tensorflow.python.ops import array_ops
tfgan = tf.contrib.gan

session=tf.compat.v1.InteractiveSession()

# A smaller BATCH_SIZE reduces GPU memory usage, but at the cost of a slight slowdown
BATCH_SIZE = 64
INCEPTION_URL = 'http://download.tensorflow.org/models/frozen_inception_v1_2015_12_05.tar.gz'
INCEPTION_FROZEN_GRAPH = 'inceptionv1_for_inception_score.pb'

# Run images through Inception.
inception_images = tf.compat.v1.placeholder(tf.float32, [None, 3, None, None])
def inception_logits(images = inception_images, num_splits = 1):
    images = tf.transpose(images, [0, 2, 3, 1])
    size = 299
    images = tf.compat.v1.image.resize_bilinear(images, [size, size])
    generated_images_list = array_ops.split(images, num_or_size_splits = num_splits)
    logits = tf.map_fn(
        fn = functools.partial(
             tfgan.eval.run_inception, 
             default_graph_def_fn = functools.partial(
             tfgan.eval.get_graph_def_from_url_tarball, 
             INCEPTION_URL, 
             INCEPTION_FROZEN_GRAPH, 
             os.path.basename(INCEPTION_URL)), 
             output_tensor = 'logits:0'),
        elems = array_ops.stack(generated_images_list),
        parallel_iterations = 8,
        back_prop = False,
        swap_memory = True,
        name = 'RunClassifier')
    logits = array_ops.concat(array_ops.unstack(logits), 0)
    return logits

logits=inception_logits()

def get_inception_probs(inps):
    n_batches = int(np.ceil(float(inps.shape[0]) / BATCH_SIZE))
    preds = np.zeros([inps.shape[0], 1000], dtype = np.float32)
    for i in tqdm(range(n_batches)):
        inp = inps[i * BATCH_SIZE:(i + 1) * BATCH_SIZE] / 255. * 2 - 1
        preds[i * BATCH_SIZE : i * BATCH_SIZE + min(BATCH_SIZE, inp.shape[0])] = session.run(logits,{inception_images: inp})[:, :1000]
    preds = np.exp(preds) / np.sum(np.exp(preds), 1, keepdims=True)
    return preds

def preds2score(preds, splits=10):
    scores = []
    for i in range(splits):
        part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :]
        kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
        kl = np.mean(np.sum(kl, 1))
        scores.append(np.exp(kl))
    return np.mean(scores), np.std(scores)

def get_inception_score(images, splits=10):
    assert(type(images) == np.ndarray)
    assert(len(images.shape) == 4)
    assert(images.shape[1] == 3)
    assert(np.min(images[0]) >= 0 and np.max(images[0]) > 10), 'Image values should be in the range [0, 255]'
    print('Calculating Inception Score with %i images in %i splits' % (images.shape[0], splits))
    start_time=time.time()
    preds = get_inception_probs(images)
    mean, std = preds2score(preds, splits)
    print('Inception Score calculation time: %f s' % (time.time() - start_time))
    return mean, std  # Reference values: 11.38 for 50000 CIFAR-10 training set images, or mean=11.31, std=0.10 if in 10 splits.

def compute_is_from_npz(path):
    with np.load(path) as data:
        fake_imgs = data['fake']
    fake_imgs = fake_imgs.transpose(0, 3, 1, 2)
    print(fake_imgs.shape)
    return get_inception_score(fake_imgs)


if __name__ == '__main__':
    import argparse
    import json

    parser = argparse.ArgumentParser('compute TF IS')
    parser.add_argument('--samples', help='path to samples')
    parser.add_argument('--it', type=str, help='path to samples')
    parser.add_argument('--results_dir', help='path to results_dir')
    args = parser.parse_args()

    it = args.it
    results_dir = args.results_dir
    mean, std = compute_is_from_npz(args.samples)

    with open(os.path.join(args.results_dir, 'is_results.json')) as f:
        is_results = json.load(f)

    is_results[it] = float(mean)
    print(f'{results_dir} iteration {it} IS: {mean}')

    with open(os.path.join(args.results_dir, 'is_results.json'), 'w') as f:
        f.write(json.dumps(is_results))

================================================
FILE: gan_training/models/__init__.py
================================================
from gan_training.models import (dcgan_deep, dcgan_shallow, resnet2)

generator_dict = {
    'resnet2': resnet2.Generator,
    'dcgan_deep': dcgan_deep.Generator,
    'dcgan_shallow': dcgan_shallow.Generator
}

discriminator_dict = {
    'resnet2': resnet2.Discriminator,
    'dcgan_deep': dcgan_deep.Discriminator,
    'dcgan_shallow': dcgan_shallow.Discriminator
}


================================================
FILE: gan_training/models/blocks.py
================================================
import torch
from torch import nn
from torch.autograd import Variable
from torch.nn import functional as F


class ResnetBlock(nn.Module):
    def __init__(self,
                 fin,
                 fout,
                 bn,
                 nclasses,
                 fhidden=None,
                 is_bias=True):
        super().__init__()
        # Attributes
        self.is_bias = is_bias
        self.learned_shortcut = (fin != fout)
        self.fin = fin
        self.fout = fout
        if fhidden is None:
            self.fhidden = min(fin, fout)
        else:
            self.fhidden = fhidden
        # Submodules
        self.conv_0 = nn.Conv2d(self.fin, self.fhidden, 3, stride=1, padding=1)
        self.conv_1 = nn.Conv2d(self.fhidden,
                      self.fout,
                      3,
                      stride=1,
                      padding=1,
                      bias=is_bias)
        if self.learned_shortcut:
            self.conv_s = nn.Conv2d(self.fin,
                          self.fout,
                          1,
                          stride=1,
                          padding=0,
                          bias=False)
        self.bn0 = bn(self.fin, nclasses)
        self.bn1 = bn(self.fhidden, nclasses)

    def forward(self, x, y):
        x_s = self._shortcut(x)
        dx = self.conv_0(actvn(self.bn0(x, y)))
        dx = self.conv_1(actvn(self.bn1(dx, y)))
        out = x_s + 0.1 * dx

        return out

    def _shortcut(self, x):
        if self.learned_shortcut:
            x_s = self.conv_s(x)
        else:
            x_s = x
        return x_s


def actvn(x):
    out = F.leaky_relu(x, 2e-1)
    return out


class LatentEmbeddingConcat(nn.Module):
    ''' projects class embedding onto hypersphere and returns the concat of the latent and the class embedding '''

    def __init__(self, nlabels, embed_dim):
        super().__init__()
        self.embedding = nn.Embedding(nlabels, embed_dim)

    def forward(self, z, y):
        assert (y.size(0) == z.size(0))
        yembed = self.embedding(y)
        yembed = yembed / torch.norm(yembed, p=2, dim=1, keepdim=True)
        yz = torch.cat([z, yembed], dim=1)
        return yz


class NormalizeLinear(nn.Module):
    def __init__(self, act_dim, k_value):
        super().__init__()
        self.lin = nn.Linear(act_dim, k_value)

    def normalize(self):
        self.lin.weight.data = F.normalize(self.lin.weight.data, p=2, dim=1)

    def forward(self, x):
        self.normalize()
        return self.lin(x)


class Identity(nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__()

    def forward(self, inp, *args, **kwargs):
        return inp


class LinearConditionalMaskLogits(nn.Module):
    ''' runs activated logits through fc and masks out the appropriate discriminator score according to class number'''

    def __init__(self, nc, nlabels):
        super().__init__()
        self.fc = nn.Linear(nc, nlabels)

    def forward(self, inp, y=None, take_best=False, get_features=False):
        out = self.fc(inp)
        if get_features: return out

        if not take_best:
            y = y.view(-1)
            index = Variable(torch.LongTensor(range(out.size(0))))
            if y.is_cuda:
                index = index.cuda()
            return out[index, y]
        else:
            # high activation means real, so take the highest activations
            best_logits, _ = out.max(dim=1)
            return best_logits


class ProjectionDiscriminatorLogits(nn.Module):
    ''' takes in activated flattened logits before last linear layer and implements https://arxiv.org/pdf/1802.05637.pdf '''

    def __init__(self, nc, nlabels):
        super().__init__()
        self.fc = nn.Linear(nc, 1)
        self.embedding = nn.Embedding(nlabels, nc)
        self.nlabels = nlabels

    def forward(self, x, y, take_best=False):
        output = self.fc(x)

        if not take_best:
            label_info = torch.sum(self.embedding(y) * x, dim=1, keepdim=True)
            return (output + label_info).view(x.size(0))
        else:
            #TODO: this may be computationally expensive, maybe we want to do the global pooling first to reduce x's size
            index = torch.LongTensor(range(self.nlabels)).cuda()
            labels = index.repeat((x.size(0), ))
            x = x.repeat_interleave(self.nlabels, dim=0)
            label_info = torch.sum(self.embedding(labels) * x,
                                   dim=1,
                                   keepdim=True).view(output.size(0),
                                                      self.nlabels)
            # high activation means real, so take the highest activations
            best_logits, _ = label_info.max(dim=1)
            return output.view(output.size(0)) + best_logits


class LinearUnconditionalLogits(nn.Module):
    ''' standard discriminator logit layer '''

    def __init__(self, nc):
        super().__init__()
        self.fc = nn.Linear(nc, 1)

    def forward(self, inp, y, take_best=False):
        assert (take_best == False)

        out = self.fc(inp)
        return out.view(out.size(0))


class Reshape(nn.Module):
    def __init__(self, *shape):
        super().__init__()
        self.shape = shape

    def forward(self, x):
        batch_size = x.shape[0]
        return x.view(*((batch_size, ) + self.shape))


class ConditionalBatchNorm2d(nn.Module):
    ''' from https://github.com/pytorch/pytorch/issues/8985#issuecomment-405080775 '''

    def __init__(self, num_features, num_classes):
        super().__init__()
        self.num_features = num_features
        self.bn = nn.BatchNorm2d(num_features, affine=False)
        self.embed = nn.Embedding(num_classes, num_features * 2)
        self.embed.weight.data[:, :num_features].normal_(
            1, 0.02)  # Initialize scale at N(1, 0.02)
        self.embed.weight.data[:, num_features:].zero_(
        )  # Initialize bias at 0

    def forward(self, x, y):
        out = self.bn(x)
        gamma, beta = self.embed(y).chunk(2, 1)
        out = gamma.view(-1, self.num_features, 1, 1) * out + beta.view(
            -1, self.num_features, 1, 1)
        return out


class BatchNorm2d(nn.Module):
    ''' identical to nn.BatchNorm2d but takes in y input that is ignored '''

    def __init__(self, nc, nchannels, **kwargs):
        super().__init__()
        self.bn = nn.BatchNorm2d(nc)

    def forward(self, x, y):
        return self.bn(x)


================================================
FILE: gan_training/models/dcgan_deep.py
================================================
import torch
from torch import nn
from torch.nn import functional as F
import torch.utils.data
import torch.utils.data.distributed
from gan_training.models import blocks


class Generator(nn.Module):
    def __init__(self,
                 nlabels,
                 conditioning,
                 z_dim=128,
                 nc=3,
                 ngf=64,
                 embed_dim=256,
                 **kwargs):
        super(Generator, self).__init__()

        assert conditioning != 'unconditional' or nlabels == 1

        if conditioning == 'embedding':
            self.get_latent = blocks.LatentEmbeddingConcat(nlabels, embed_dim)
            self.fc = nn.Linear(z_dim + embed_dim, 4 * 4 * ngf * 8)
        elif conditioning == 'unconditional':
            self.get_latent = blocks.Identity()
            self.fc = nn.Linear(z_dim, 4 * 4 * ngf * 8)
        else:
            raise NotImplementedError(
                f"{conditioning} not implemented for generator")

        bn = blocks.BatchNorm2d

        self.nlabels = nlabels

        self.conv1 = nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1)
        self.bn1 = bn(ngf * 4, nlabels)

        self.conv2 = nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1)
        self.bn2 = bn(ngf * 2, nlabels)

        self.conv3 = nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1)
        self.bn3 = bn(ngf, nlabels)

        self.conv_out = nn.Sequential(nn.Conv2d(ngf, nc, 3, 1, 1), nn.Tanh())

    def forward(self, input, y):
        y = y.clamp(None, self.nlabels - 1)
        out = self.get_latent(input, y)

        out = self.fc(out)
        out = out.view(out.size(0), -1, 4, 4)
        out = F.relu(self.bn1(self.conv1(out), y))
        out = F.relu(self.bn2(self.conv2(out), y))
        out = F.relu(self.bn3(self.conv3(out), y))
        return self.conv_out(out)


class Discriminator(nn.Module):
    def __init__(self,
                 nlabels,
                 conditioning,
                 nc=3,
                 ndf=64,
                 pack_size=1,
                 features='penultimate',
                 **kwargs):

        super(Discriminator, self).__init__()

        assert conditioning != 'unconditional' or nlabels == 1

        self.nlabels = nlabels

        self.conv1 = nn.Sequential(nn.Conv2d(nc * pack_size, ndf, 3, 1, 1), nn.LeakyReLU(0.1))
        self.conv2 = nn.Sequential(nn.Conv2d(ndf, ndf, 4, 2, 1), nn.LeakyReLU(0.1))
        self.conv3 = nn.Sequential(nn.Conv2d(ndf, ndf * 2, 3, 1, 1), nn.LeakyReLU(0.1))
        self.conv4 = nn.Sequential(nn.Conv2d(ndf * 2, ndf * 2, 4, 2, 1), nn.LeakyReLU(0.1))
        self.conv5 = nn.Sequential(nn.Conv2d(ndf * 2, ndf * 4, 3, 1, 1), nn.LeakyReLU(0.1))
        self.conv6 = nn.Sequential(nn.Conv2d(ndf * 4, ndf * 4, 4, 2, 1), nn.LeakyReLU(0.1))
        self.conv7 = nn.Sequential(nn.Conv2d(ndf * 4, ndf * 8, 3, 1, 1), nn.LeakyReLU(0.1))

        if conditioning == 'mask':
            self.fc_out = blocks.LinearConditionalMaskLogits(
                ndf * 8 * 4 * 4, nlabels)
        elif conditioning == 'unconditional':
            self.fc_out = blocks.LinearUnconditionalLogits(
                ndf * 8 * 4 * 4)
        else:
            raise NotImplementedError(
                f"{conditioning} not implemented for discriminator")

        self.features = features
        self.pack_size = pack_size
        print(f'Getting features from {self.features}')

    def stack(self, x):
        #pacgan
        nc = self.pack_size
        assert (x.size(0) % nc == 0)
        if nc == 1:
            return x
        x_new = []
        for i in range(x.size(0) // nc):
            imgs_to_stack = x[i * nc:(i + 1) * nc]
            x_new.append(torch.cat([t for t in imgs_to_stack], dim=0))
        return torch.stack(x_new)

    def forward(self, input, y=None, get_features=False):
        input = self.stack(input)
        out = self.conv1(input)
        out = self.conv2(out)
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.conv5(out)
        out = self.conv6(out)
        out = self.conv7(out)

        if get_features and self.features == "penultimate":
            return out.view(out.size(0), -1)
        if get_features and self.features == "summed":
            return out.view(out.size(0), out.size(1), -1).sum(dim=2)
            
        out = out.view(out.size(0), -1)
        y = y.clamp(None, self.nlabels - 1)
        result = self.fc_out(out, y)
        assert (len(result.shape) == 1)
        return result


if __name__ == '__main__':
    z = torch.zeros((1, 128))
    g = Generator()
    x = torch.zeros((1, 3, 32, 32))
    d = Discriminator()

    g(z)
    d(g(z))
    d(x)


================================================
FILE: gan_training/models/dcgan_shallow.py
================================================
import torch
from torch import nn
from torch.nn import functional as F
import torch.utils.data
import torch.utils.data.distributed
from gan_training.models import blocks


class Generator(nn.Module):
    def __init__(self,
                 nlabels,
                 conditioning,
                 z_dim=128,
                 nc=3,
                 ngf=64,
                 embed_dim=256,
                 **kwargs):
        super(Generator, self).__init__()

        assert conditioning != 'unconditional' or nlabels == 1

        if conditioning == 'embedding':
            self.get_latent = blocks.LatentEmbeddingConcat(nlabels, embed_dim)
            self.fc = nn.Linear(z_dim + embed_dim, 4 * 4 * ngf * 8)
        elif conditioning == 'unconditional':
            self.get_latent = blocks.Identity()
            self.fc = nn.Linear(z_dim, 4 * 4 * ngf * 8)
        else:
            raise NotImplementedError(
                f"{conditioning} not implemented for generator")

        bn = blocks.BatchNorm2d

        self.nlabels = nlabels

        self.conv1 = nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1)
        self.bn1 = bn(ngf * 4, nlabels)

        self.conv2 = nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1)
        self.bn2 = bn(ngf * 2, nlabels)

        self.conv3 = nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1)
        self.bn3 = bn(ngf, nlabels)

        self.conv_out = nn.Sequential(nn.Conv2d(ngf, nc, 3, 1, 1), nn.Tanh())

    def forward(self, input, y):
        y = y.clamp(None, self.nlabels - 1)

        out = self.get_latent(input, y)
        out = self.fc(out)

        out = out.view(out.size(0), -1, 4, 4)
        out = F.relu(self.bn1(self.conv1(out), y))
        out = F.relu(self.bn2(self.conv2(out), y))
        out = F.relu(self.bn3(self.conv3(out), y))
        return self.conv_out(out)


class Discriminator(nn.Module):
    def __init__(self,
                 nlabels,
                 conditioning,
                 features='penultimate',
                 pack_size=1,
                 nc=3,
                 ndf=64,
                 **kwargs):
        super(Discriminator, self).__init__()

        assert conditioning != 'unconditional' or nlabels == 1

        self.nlabels = nlabels

        self.conv1 = nn.Sequential(nn.Conv2d(nc * pack_size, ndf, 4, 2, 1),
                                   nn.BatchNorm2d(ndf),
                                   nn.LeakyReLU(0.2, inplace=True))
        self.conv2 = nn.Sequential(nn.Conv2d(ndf, ndf * 2, 4, 2, 1),
                                   nn.BatchNorm2d(ndf * 2),
                                   nn.LeakyReLU(0.2, inplace=True))
        self.conv3 = nn.Sequential(nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1),
                                   nn.BatchNorm2d(ndf * 4),
                                   nn.LeakyReLU(0.2, inplace=True))
        self.conv4 = nn.Sequential(nn.Conv2d(ndf * 4, ndf * 8, 4, 2, 1),
                                   nn.BatchNorm2d(ndf * 8),
                                   nn.LeakyReLU(0.2, inplace=True))

        if conditioning == 'mask':
            self.fc_out = blocks.LinearConditionalMaskLogits(ndf * 8 * 4 , nlabels)
        elif conditioning == 'unconditional':
            self.fc_out = blocks.LinearUnconditionalLogits(ndf * 8 * 4)
        else:
            raise NotImplementedError(
                f"{conditioning} not implemented for discriminator")

        self.pack_size = pack_size
        self.features = features
        print(f'Getting features from {self.features}')

    def stack(self, x):
        #pacgan
        nc = self.pack_size
        if nc == 1:
            return x
        x_new = []
        for i in range(x.size(0) // nc):
            imgs_to_stack = x[i * nc:(i + 1) * nc]
            x_new.append(torch.cat([t for t in imgs_to_stack], dim=0))
        return torch.stack(x_new)

    def forward(self, input, y=None, get_features=False):
        input = self.stack(input)
        out = self.conv1(input)
        out = self.conv2(out)
        out = self.conv3(out)
        out = self.conv4(out)
        out = out.view(out.size(0), -1)
        if get_features: return out.view(out.size(0), -1)
        y = y.clamp(None, self.nlabels - 1)
        result = self.fc_out(out, y)
        assert (len(result.shape) == 1)
        return result


if __name__ == '__main__':
    z = torch.zeros((1, 128))
    g = Generator()
    x = torch.zeros((1, 3, 32, 32))
    d = Discriminator()

    g(z)
    d(g(z))
    d(x)


================================================
FILE: gan_training/models/resnet2.py
================================================
import torch
from torch import nn
from torch.nn import functional as F
from torch.autograd import Variable
import torch.utils.data
import torch.utils.data.distributed

from gan_training.models import blocks
from gan_training.models.blocks import ResnetBlock
from torch.nn.utils.spectral_norm import spectral_norm


class Generator(nn.Module):
    def __init__(self,
                 z_dim,
                 nlabels,
                 size,
                 conditioning,
                 embed_size=256,
                 nfilter=64,
                 **kwargs):
        super().__init__()
        s0 = self.s0 = size // 32
        nf = self.nf = nfilter
        self.nlabels = nlabels
        self.z_dim = z_dim

        assert conditioning != 'unconditional' or nlabels == 1

        if conditioning == 'embedding':
            self.get_latent = blocks.LatentEmbeddingConcat(nlabels, embed_size)
            self.fc = nn.Linear(z_dim + embed_size, 16 * nf * s0 * s0)
        elif conditioning == 'unconditional':
            self.get_latent = blocks.Identity()
            self.fc = nn.Linear(z_dim, 16 * nf * s0 * s0)
        else:
            raise NotImplementedError(
                f"{conditioning} not implemented for generator")

        #either use conditional batch norm, or use no batch norm
        bn = blocks.Identity

        self.resnet_0_0 = ResnetBlock(16 * nf, 16 * nf, bn, nlabels)
        self.resnet_0_1 = ResnetBlock(16 * nf, 16 * nf, bn, nlabels)

        self.resnet_1_0 = ResnetBlock(16 * nf, 16 * nf, bn, nlabels)
        self.resnet_1_1 = ResnetBlock(16 * nf, 16 * nf, bn, nlabels)

        self.resnet_2_0 = ResnetBlock(16 * nf, 8 * nf, bn, nlabels)
        self.resnet_2_1 = ResnetBlock(8 * nf, 8 * nf, bn, nlabels)

        self.resnet_3_0 = ResnetBlock(8 * nf, 4 * nf, bn, nlabels)
        self.resnet_3_1 = ResnetBlock(4 * nf, 4 * nf, bn, nlabels)

        self.resnet_4_0 = ResnetBlock(4 * nf, 2 * nf, bn, nlabels)
        self.resnet_4_1 = ResnetBlock(2 * nf, 2 * nf, bn, nlabels)

        self.resnet_5_0 = ResnetBlock(2 * nf, 1 * nf, bn, nlabels)
        self.resnet_5_1 = ResnetBlock(1 * nf, 1 * nf, bn, nlabels)

        self.conv_img = nn.Conv2d(nf, 3, 3, padding=1)

    def forward(self, z, y):
        y = y.clamp(None, self.nlabels - 1)
        out = self.get_latent(z, y)

        out = self.fc(out)

        out = out.view(z.size(0), 16 * self.nf, self.s0, self.s0)

        out = self.resnet_0_0(out, y)
        out = self.resnet_0_1(out, y)

        out = F.interpolate(out, scale_factor=2)
        out = self.resnet_1_0(out, y)
        out = self.resnet_1_1(out, y)

        out = F.interpolate(out, scale_factor=2)
        out = self.resnet_2_0(out, y)
        out = self.resnet_2_1(out, y)

        out = F.interpolate(out, scale_factor=2)
        out = self.resnet_3_0(out, y)
        out = self.resnet_3_1(out, y)

        out = F.interpolate(out, scale_factor=2)
        out = self.resnet_4_0(out, y)
        out = self.resnet_4_1(out, y)

        out = F.interpolate(out, scale_factor=2)
        out = self.resnet_5_0(out, y)
        out = self.resnet_5_1(out, y)

        out = self.conv_img(actvn(out))
        out = torch.tanh(out)

        return out


class Discriminator(nn.Module):
    def __init__(self,
                 nlabels,
                 size,
                 conditioning,
                 nfilter=64,
                 features='penultimate',
                 **kwargs):
        super().__init__()
        s0 = self.s0 = size // 32
        nf = self.nf = nfilter
        self.nlabels = nlabels

        assert conditioning != 'unconditional' or nlabels == 1
        bn = blocks.Identity

        self.conv_img = nn.Conv2d(3, 1 * nf, 3, padding=1)

        self.resnet_0_0 = ResnetBlock(1 * nf, 1 * nf, bn, nlabels)
        self.resnet_0_1 = ResnetBlock(1 * nf, 2 * nf, bn, nlabels)

        self.resnet_1_0 = ResnetBlock(2 * nf, 2 * nf, bn, nlabels)
        self.resnet_1_1 = ResnetBlock(2 * nf, 4 * nf, bn, nlabels)

        self.resnet_2_0 = ResnetBlock(4 * nf, 4 * nf, bn, nlabels)
        self.resnet_2_1 = ResnetBlock(4 * nf, 8 * nf, bn, nlabels)

        self.resnet_3_0 = ResnetBlock(8 * nf, 8 * nf, bn, nlabels)
        self.resnet_3_1 = ResnetBlock(8 * nf, 16 * nf, bn, nlabels)

        self.resnet_4_0 = ResnetBlock(16 * nf, 16 * nf, bn, nlabels)
        self.resnet_4_1 = ResnetBlock(16 * nf, 16 * nf, bn, nlabels)

        self.resnet_5_0 = ResnetBlock(16 * nf, 16 * nf, bn, nlabels)
        self.resnet_5_1 = ResnetBlock(16 * nf, 16 * nf, bn, nlabels)

        if conditioning == 'mask':
            self.fc_out = blocks.LinearConditionalMaskLogits(
                16 * nf * s0 * s0, nlabels)
        elif conditioning == 'unconditional':
            self.fc_out = blocks.LinearUnconditionalLogits(16 * nf * s0 * s0)
        else:
            raise NotImplementedError(
                f"{conditioning} not implemented for discriminator")

        self.features = features

    def forward(self, x, y=None, get_features=False):
        batch_size = x.size(0)
        if y is not None:
            y = y.clamp(None, self.nlabels - 1)

        out = self.conv_img(x)

        out = self.resnet_0_0(out, y)
        out = self.resnet_0_1(out, y)
        out = F.avg_pool2d(out, 3, stride=2, padding=1)
        out = self.resnet_1_0(out, y)
        out = self.resnet_1_1(out, y)
        out = F.avg_pool2d(out, 3, stride=2, padding=1)
        out = self.resnet_2_0(out, y)
        out = self.resnet_2_1(out, y)
        out = F.avg_pool2d(out, 3, stride=2, padding=1)
        out = self.resnet_3_0(out, y)
        out = self.resnet_3_1(out, y)
        out = F.avg_pool2d(out, 3, stride=2, padding=1)
        out = self.resnet_4_0(out, y)
        out = self.resnet_4_1(out, y)
        out = F.avg_pool2d(out, 3, stride=2, padding=1)
        out = self.resnet_5_0(out, y)
        out = self.resnet_5_1(out, y)
        out = actvn(out)

        if get_features and self.features == 'summed':
            return out.view(out.size(0), out.size(1), -1).sum(dim=2)

        out = out.view(batch_size, 16 * self.nf * self.s0 * self.s0)

        if get_features: return out.view(batch_size, -1)
        result = self.fc_out(out, y)
        assert (len(result.shape) == 1)
        return result


def actvn(x):
    out = F.leaky_relu(x, 2e-1)
    return out

================================================
FILE: gan_training/models/resnet2s.py
================================================
import torch
from torch import nn
from torch.nn import functional as F
from torch.autograd import Variable
import torch.utils.data
import torch.utils.data.distributed
from collections import OrderedDict


class Reshape(nn.Module):
    def __init__(self, *shape):
        super().__init__()
        self.shape = shape

    def forward(self, x):
        batch_size = x.shape[0]
        return x.view(*((batch_size, ) + self.shape))


class Generator(nn.Module):
    '''
    Perfectly equivalent to resnet2.Generator (can load state dicts
    from that class), but organizes layers as a sequence for more
    automatic inversion.
    '''

    def __init__(self,
                 z_dim,
                 nlabels,
                 size,
                 embed_size=256,
                 nfilter=64,
                 use_class_labels=False,
                 **kwargs):
        super().__init__()
        s0 = self.s0 = size // 32
        nf = self.nf = nfilter
        self.z_dim = z_dim
        self.use_class_labels = use_class_labels
        # Submodules
        if use_class_labels:
            self.condition = ConditionGen(z_dim, nlabels, embed_size)
            latent_dim = self.condition.latent_dim
        else:
            latent_dim = z_dim

        self.layers = nn.Sequential(
            OrderedDict([('fc', nn.Linear(latent_dim, 16 * nf * s0 * s0)),
                         ('reshape', Reshape(16 * self.nf, self.s0, self.s0)),
                         ('resnet_0_0', ResnetBlock(16 * nf, 16 * nf)),
                         ('resnet_0_1', ResnetBlock(16 * nf, 16 * nf)),
                         ('upsample_1', nn.Upsample(scale_factor=2)),
                         ('resnet_1_0', ResnetBlock(16 * nf, 16 * nf)),
                         ('resnet_1_1', ResnetBlock(16 * nf, 16 * nf)),
                         ('upsample_2', nn.Upsample(scale_factor=2)),
                         ('resnet_2_0', ResnetBlock(16 * nf, 8 * nf)),
                         ('resnet_2_1', ResnetBlock(8 * nf, 8 * nf)),
                         ('upsample_3', nn.Upsample(scale_factor=2)),
                         ('resnet_3_0', ResnetBlock(8 * nf, 4 * nf)),
                         ('resnet_3_1', ResnetBlock(4 * nf, 4 * nf)),
                         ('upsample_4', nn.Upsample(scale_factor=2)),
                         ('resnet_4_0', ResnetBlock(4 * nf, 2 * nf)),
                         ('resnet_4_1', ResnetBlock(2 * nf, 2 * nf)),
                         ('upsample_5', nn.Upsample(scale_factor=2)),
                         ('resnet_5_0', ResnetBlock(2 * nf, 1 * nf)),
                         ('resnet_5_1', ResnetBlock(1 * nf, 1 * nf)),
                         ('img_relu', nn.LeakyReLU(2e-1)),
                         ('conv_img', nn.Conv2d(nf, 3, 3, padding=1)),
                         ('tanh', nn.Tanh())]))

    def forward(self, z, y=None):
        assert (y is None or z.size(0) == y.size(0))
        assert (not self.use_class_labels or y is not None)
        batch_size = z.size(0)
        if self.use_class_labels:
            z = self.condition(z, y)
        return self.layers(z)

    def load_v2_state_dict(self, state_dict):
        converted = {}
        for k, v in state_dict.items():
            if 'module.' in k: k = k.split('module.')[1]
            if k.startswith('embedding'):
                k = 'condition.' + k
            elif k == 'get_latent.embedding.weight':
                k = 'condition.embedding.weight'
            else:
                k = 'layers.' + k
            converted[k] = v
        self.load_state_dict(converted)


class ConditionGen(nn.Module):
    def __init__(self, z_dim, nlabels, embed_size=256):
        super().__init__()
        self.embedding = nn.Embedding(nlabels, embed_size)
        self.latent_dim = z_dim + embed_size
        self.z_dim = z_dim
        self.nlabels = nlabels
        self.embed_size = embed_size

    def forward(self, z, y):
        assert (z.size(0) == y.size(0))
        batch_size = z.size(0)
        if y.dtype is torch.int64:
            yembed = self.embedding(y)
        else:
            yembed = y
        yembed = yembed / torch.norm(yembed, p=2, dim=1, keepdim=True)
        return torch.cat([z, yembed], dim=1)


def convert_from_resnet2_generator(gen):
    nlabels, embed_size = 0, 0
    use_class_labels = False
    if hasattr(gen, 'embedding'):
        # new version does not have gen.use_class_labels..
        nlabels = gen.embedding.num_embeddings
        embed_size = gen.embedding.embedding_dim
        use_class_labels = True
    if hasattr(gen, 'get_latent'):
        # new version does not have gen.use_class_labels..
        nlabels = gen.get_latent.embedding.num_embeddings
        embed_size = gen.get_latent.embedding.embedding_dim
        use_class_labels = True
    size = gen.s0 * 32
    newgen = Generator(gen.z_dim, nlabels, size, embed_size, gen.nf,
                       use_class_labels)
    newgen.load_v2_state_dict(gen.state_dict())
    return newgen


class ResnetBlock(nn.Module):
    def __init__(self, fin, fout, fhidden=None, is_bias=True):
        super().__init__()
        # Attributes
        self.is_bias = is_bias
        self.learned_shortcut = (fin != fout)
        self.fin = fin
        self.fout = fout
        if fhidden is None:
            self.fhidden = min(fin, fout)
        else:
            self.fhidden = fhidden

        # Submodules
        self.conv_0 = nn.Conv2d(self.fin,
                                self.fhidden,
                                kernel_size=3,
                                stride=1,
                                padding=1)
        self.conv_1 = nn.Conv2d(self.fhidden,
                                self.fout,
                                kernel_size=3,
                                stride=1,
                                padding=1,
                                bias=is_bias)
        if self.learned_shortcut:
            self.conv_s = nn.Conv2d(self.fin,
                                    self.fout,
                                    kernel_size=1,
                                    stride=1,
                                    padding=0,
                                    bias=False)

    def forward(self, x):
        x_s = self._shortcut(x)
        dx = self.conv_0(actvn(x))
        dx = self.conv_1(actvn(dx))
        out = x_s + 0.1 * dx

        return out

    def _shortcut(self, x):
        if self.learned_shortcut:
            x_s = self.conv_s(x)
        else:
            x_s = x
        return x_s


def actvn(x):
    out = F.leaky_relu(x, 2e-1)
    return out


================================================
FILE: gan_training/models/resnet3.py
================================================
import torch
from torch import nn
from torch.nn import functional as F
from torch.autograd import Variable
import torch.utils.data
import torch.utils.data.distributed
from collections import OrderedDict

class Generator(nn.Module):
    '''
    Perfectly equivalent to resnet2.Generator (can load state dicts
    from that class), but organizes layers as a sequence for more
    automatic inversion.
    '''
    def __init__(self, z_dim, nlabels, size, embed_size=256, nfilter=64,
            use_class_labels=False, **kwargs):
        super().__init__()
        s0 = self.s0 = size // 32
        nf = self.nf = nfilter
        self.z_dim = z_dim
        self.use_class_labels = use_class_labels

        # Submodules
        if use_class_labels:
            self.condition = ConditionGen(z_dim, nlabels, embed_size)
            latent_dim = self.condition.latent_dim
        else:
            latent_dim = z_dim

        self.layers = nn.Sequential(OrderedDict([
            ('fc', nn.Linear(latent_dim, 16*nf*s0*s0)),
            ('reshape', Reshape(16*self.nf, self.s0, self.s0)),
            ('resnet_0_0', ResnetBlock(16*nf, 16*nf)),
            ('resnet_0_1', ResnetBlock(16*nf, 16*nf)),
            ('upsample_1', nn.Upsample(scale_factor=2)),
            ('resnet_1_0', ResnetBlock(16*nf, 16*nf)),
            ('resnet_1_1', ResnetBlock(16*nf, 16*nf)),
            ('upsample_2', nn.Upsample(scale_factor=2)),
            ('resnet_2_0', ResnetBlock(16*nf, 8*nf)),
            ('resnet_2_1', ResnetBlock(8*nf, 8*nf)),
            ('upsample_3', nn.Upsample(scale_factor=2)),
            ('resnet_3_0', ResnetBlock(8*nf, 4*nf)),
            ('resnet_3_1', ResnetBlock(4*nf, 4*nf)),
            ('upsample_4', nn.Upsample(scale_factor=2)),
            ('resnet_4_0', ResnetBlock(4*nf, 2*nf)),
            ('resnet_4_1', ResnetBlock(2*nf, 2*nf)),
            ('upsample_5', nn.Upsample(scale_factor=2)),
            ('resnet_5_0', ResnetBlock(2*nf, 1*nf)),
            ('resnet_5_1', ResnetBlock(1*nf, 1*nf)),
            ('img_relu', nn.LeakyReLU(2e-1)),
            ('conv_img', nn.Conv2d(nf, 3, 3, padding=1)),
            ('tanh', nn.Tanh())
        ]))

    def forward(self, z, y=None):
        assert(y is None or z.size(0) == y.size(0))
        assert(not self.use_class_labels or y is not None)
        batch_size = z.size(0)
        if self.use_class_labels:
            z = self.condition(z, y)
        return self.layers(z)

    def load_v2_state_dict(self, state_dict):
        converted = {}
        for k, v in state_dict.items():
            if k.startswith('embedding'):
                k = 'condition.' + k
            elif k == 'get_latent.embedding.weight':
                k = 'condition.embedding.weight'
            else:
                k = 'layers.' + k
            converted[k] = v
        self.load_state_dict(converted)

class Reshape(nn.Module):
    def __init__(self, *shape):
        super().__init__()
        self.shape = shape
    def forward(self, x):
        batch_size = x.shape[0]
        return x.view(*((batch_size,) + self.shape))

class ConditionGen(nn.Module):
    def __init__(self, z_dim, nlabels, embed_size=256):
        super().__init__()
        self.embedding = nn.Embedding(nlabels, embed_size)
        self.latent_dim = z_dim + embed_size
        self.z_dim = z_dim
        self.nlabels = nlabels
        self.embed_size = embed_size

    def forward(self, z, y):
        assert(z.size(0) == y.size(0))
        batch_size = z.size(0)
        if y.dtype is torch.int64:
            yembed = self.embedding(y)
        else:
            yembed = y
        yembed = yembed / torch.norm(yembed, p=2, dim=1, keepdim=True)
        return torch.cat([z, yembed], dim=1)

def convert_from_resnet2_generator(gen):
    nlabels, embed_size = 0, 0
    
    if hasattr(gen, 'get_latent'):
        # new version does not have gen.use_class_labels..
        nlabels = gen.get_latent.embedding.num_embeddings
        embed_size = gen.get_latent.embedding.embedding_dim
        use_class_labels = True
    elif gen.use_class_labels:
        nlabels = gen.embedding.num_embeddings
        embed_size = gen.embedding.embedding_dim
        use_class_labels = True

    size = gen.s0 * 32
    newgen = Generator(gen.z_dim, nlabels, size, embed_size, gen.nf, use_class_labels)
    newgen.load_v2_state_dict(gen.state_dict())
    return newgen


class ResnetBlock(nn.Module):
    def __init__(self, fin, fout, fhidden=None, is_bias=True):
        super().__init__()
        # Attributes
        self.is_bias = is_bias
        self.learned_shortcut = (fin != fout)
        self.fin = fin
        self.fout = fout
        if fhidden is None:
            self.fhidden = min(fin, fout)
        else:
            self.fhidden = fhidden

        # Submodules
        self.conv_0 = nn.Conv2d(self.fin, self.fhidden,
                kernel_size=3, stride=1, padding=1)
        self.conv_1 = nn.Conv2d(self.fhidden, self.fout,
                kernel_size=3, stride=1, padding=1, bias=is_bias)
        if self.learned_shortcut:
            self.conv_s = nn.Conv2d(self.fin, self.fout,
                    kernel_size=1, stride=1, padding=0, bias=False)

    def forward(self, x):
        x_s = self._shortcut(x)
        dx = self.conv_0(actvn(x))
        dx = self.conv_1(actvn(dx))
        out = x_s + 0.1*dx

        return out

    def _shortcut(self, x):
        if self.learned_shortcut:
            x_s = self.conv_s(x)
        else:
            x_s = x
        return x_s


def actvn(x):
    out = F.leaky_relu(x, 2e-1)
    return out

================================================
FILE: gan_training/train.py
================================================
# coding: utf-8
import torch
from torch.nn import functional as F
import torch.utils.data
import torch.utils.data.distributed
from torch import autograd
import numpy as np


class Trainer(object):
    def __init__(self,
                 generator,
                 discriminator,
                 g_optimizer,
                 d_optimizer,
                 gan_type,
                 reg_type,
                 reg_param):

        self.generator = generator
        self.discriminator = discriminator
        self.g_optimizer = g_optimizer
        self.d_optimizer = d_optimizer
        self.gan_type = gan_type
        self.reg_type = reg_type
        self.reg_param = reg_param

        print('D reg gamma', self.reg_param)

    def generator_trainstep(self, y, z):
        assert (y.size(0) == z.size(0))
        toggle_grad(self.generator, True)
        toggle_grad(self.discriminator, False)

        self.generator.train()
        self.discriminator.train()
        self.g_optimizer.zero_grad()

        x_fake = self.generator(z, y)
        d_fake = self.discriminator(x_fake, y)
        gloss = self.compute_loss(d_fake, 1)
        gloss.backward()

        self.g_optimizer.step()

        return gloss.item()

    def discriminator_trainstep(self, x_real, y, z):
        toggle_grad(self.generator, False)
        toggle_grad(self.discriminator, True)
        self.generator.train()
        self.discriminator.train()
        self.d_optimizer.zero_grad()

        # On real data
        x_real.requires_grad_()

        d_real = self.discriminator(x_real, y)
        dloss_real = self.compute_loss(d_real, 1)

        if self.reg_type == 'real' or self.reg_type == 'real_fake':
            dloss_real.backward(retain_graph=True)
            reg = self.reg_param * compute_grad2(d_real, x_real).mean()
            reg.backward()
        else:
            dloss_real.backward()

        # On fake data
        with torch.no_grad():
            x_fake = self.generator(z, y)

        x_fake.requires_grad_()
        d_fake = self.discriminator(x_fake, y)
        dloss_fake = self.compute_loss(d_fake, 0)

        if self.reg_type == 'fake' or self.reg_type == 'real_fake':
            dloss_fake.backward(retain_graph=True)
            reg = self.reg_param * compute_grad2(d_fake, x_fake).mean()
            reg.backward()
        else:
            dloss_fake.backward()

        if self.reg_type == 'wgangp':
            reg = self.reg_param * self.wgan_gp_reg(x_real, x_fake, y)
            reg.backward()
        elif self.reg_type == 'wgangp0':
            reg = self.reg_param * self.wgan_gp_reg(
                x_real, x_fake, y, center=0.)
            reg.backward()

        self.d_optimizer.step()

        dloss = (dloss_real + dloss_fake)
        if self.reg_type == 'none':
            reg = torch.tensor(0.)

        return dloss.item(), reg.item()

    def compute_loss(self, d_out, target):
        targets = d_out.new_full(size=d_out.size(), fill_value=target)

        if self.gan_type == 'standard':
            loss = F.binary_cross_entropy_with_logits(d_out, targets)
        elif self.gan_type == 'wgan':
            loss = (2 * target - 1) * d_out.mean()
        else:
            raise NotImplementedError

        return loss

    def wgan_gp_reg(self, x_real, x_fake, y, center=1.):
        batch_size = y.size(0)
        eps = torch.rand(batch_size, device=y.device).view(batch_size, 1, 1, 1)
        x_interp = (1 - eps) * x_real + eps * x_fake
        x_interp = x_interp.detach()
        x_interp.requires_grad_()
        d_out = self.discriminator(x_interp, y)

        reg = (compute_grad2(d_out, x_interp).sqrt() - center).pow(2).mean()

        return reg


# Utility functions
def toggle_grad(model, requires_grad):
    for p in model.parameters():
        p.requires_grad_(requires_grad)


def compute_grad2(d_out, x_in):
    batch_size = x_in.size(0)
    grad_dout = autograd.grad(outputs=d_out.sum(),
                              inputs=x_in,
                              create_graph=True,
                              retain_graph=True,
                              only_inputs=True)[0]
    grad_dout2 = grad_dout.pow(2)
    assert (grad_dout2.size() == x_in.size())
    reg = grad_dout2.view(batch_size, -1).sum(1)
    return reg


def update_average(model_tgt, model_src, beta):
    toggle_grad(model_src, False)
    toggle_grad(model_tgt, False)

    param_dict_src = dict(model_src.named_parameters())

    for p_name, p_tgt in model_tgt.named_parameters():
        p_src = param_dict_src[p_name]
        assert (p_src is not p_tgt)
        p_tgt.copy_(beta * p_tgt + (1. - beta) * p_src)


================================================
FILE: gan_training/utils.py
================================================
import torch
import torch.utils.data
import torch.utils.data.distributed
import torchvision

import os


def save_images(imgs, outfile, nrow=8):
    imgs = imgs / 2 + 0.5  # unnormalize
    torchvision.utils.save_image(imgs, outfile, nrow=nrow)


def get_nsamples(data_loader, N):
    x = []
    y = []
    n = 0
    for x_next, y_next in data_loader:
        x.append(x_next)
        y.append(y_next)
        n += x_next.size(0)
        if n > N:
            break
    x = torch.cat(x, dim=0)[:N]
    y = torch.cat(y, dim=0)[:N]
    return x, y


def update_average(model_tgt, model_src, beta):
    param_dict_src = dict(model_src.named_parameters())

    for p_name, p_tgt in model_tgt.named_parameters():
        p_src = param_dict_src[p_name]
        assert (p_src is not p_tgt)
        p_tgt.copy_(beta * p_tgt + (1. - beta) * p_src)


def get_most_recent(d, ext):
    if not os.path.exists(d):
        print('Directory', d, 'does not exist')
        return -1 
    its = []
    for f in os.listdir(d):
        try:
            it = int(f.split(ext + "_")[1].split('.pt')[0])
            its.append(it)
        except Exception as e:
            pass
    if len(its) == 0:
        print('Found no files with extension \"%s\" under %s' % (ext, d))
        return -1
    return max(its)


================================================
FILE: metrics.py
================================================
import argparse
import os
import json
from tqdm import tqdm

import numpy as np
import torch

from gan_training.config import load_config
from seeded_sampler import SeededSampler

parser = argparse.ArgumentParser('Computes numbers used in paper and caches them to a result files. Examples include FID, IS, reverse-KL, # modes, FSD, cluster NMI, Purity.')
parser.add_argument('paths', nargs='+', type=str, help='list of configs for each experiment')
parser.add_argument('--it', type=int, default=-1, help='If set, computes numbers only for that iteration')
parser.add_argument('--every', type=int, default=-1, help='skips some checkpoints and only computes those whose iteration number are divisible by every')
parser.add_argument('--fid', action='store_true', help='compute FID metric')
parser.add_argument('--inception', action='store_true', help='compute IS metric')
parser.add_argument('--modes', action='store_true', help='compute # modes and reverse-KL metric')
parser.add_argument('--fsd', action='store_true', help='compute FSD metric')
parser.add_argument('--cluster_metrics', action='store_true', help='compute clustering metrics (NMI, purity)')
parser.add_argument('--device', type=int, default=1, help='device to run the metrics on (can run into OOM issues if same as main device)')
args = parser.parse_args()

device = args.device
dirs = list(args.paths)

N = 50000
BS = 100

datasets = ['imagenet', 'cifar', 'stacked_mnist', 'places']

dataset_to_img = {
    'places': 'output/places_gt_imgs.npz',
    'imagenet': 'output/imagenet_gt_imgs.npz'}


def load_results(results_dir):
    results = []
    for results_file in ['fid_results.json', 'is_results.json', 'kl_results.json', 'nmodes_results.json', 'fsd_results.json', 'cluster_metrics.json']:
        results_file = os.path.join(results_dir, results_file)
        if not os.path.exists(results_file):
            with open(results_file, 'w') as f:
                f.write(json.dumps({}))
        with open(results_file) as f:
            results.append(json.load(f))
    return results


def get_dataset_from_path(path):
    for name in datasets:
        if name in path:
            print('Inferred dataset:', name)
            return name


def pt_to_np(imgs):
    '''normalizes pytorch image in [-1, 1] to [0, 255]'''
    return (imgs.permute(0, 2, 3, 1).mul_(0.5).add_(0.5).mul_(255)).clamp_(0, 255).numpy()


def sample(sampler):
    with torch.no_grad():
        samples = []
        for _ in tqdm(range(N // BS + 1)):
            x_real = sampler.sample(BS)[0].detach().cpu()
            x_real = [x.detach().cpu() for x in x_real]
            samples.extend(x_real)
        samples = torch.stack(samples[:N], dim=0)
        return pt_to_np(samples)


root = './'

while len(dirs) > 0:
    path = dirs.pop()
    if os.path.isdir(path):     # search down tree for config files
        for d1 in os.listdir(path):
            dirs.append(os.path.join(path, d1))
    else:
        if path.endswith('.yaml'):
            config = load_config(path, default_path='configs/default.yaml')
            outdir = config['training']['out_dir']

            if not os.path.exists(outdir) and config['pretrained'] == {}:
                print('Skipping', path, 'outdir', outdir)
                continue

            results_dir = os.path.join(outdir, 'results')
            checkpoint_dir = os.path.join(outdir, 'chkpts')
            os.makedirs(results_dir, exist_ok=True)

            fid_results, is_results, kl_results, nmodes_results, fsd_results, cluster_results = load_results(results_dir)

            checkpoint_files = os.listdir(checkpoint_dir) if os.path.exists(checkpoint_dir) else []
            if config['pretrained'] != {}:
                checkpoint_files = checkpoint_files + ['pretrained']

            for checkpoint in checkpoint_files:
                if (checkpoint.endswith('.pt') and checkpoint != 'model.pt') or checkpoint == 'pretrained':
                    print('Computing for', checkpoint)
                    if 'model' in checkpoint:
                        # infer iteration number from checkpoint file w/o loading it
                        if 'model_' in checkpoint:
                            it = int(checkpoint.split('model_')[1].split('.pt')[0])
                        else:
                            continue
                        if args.every != 0 and it % args.every != 0:
                            continue
                        # iteration 0 is often useless, skip it
                        if it == 0 or args.it != -1 and it != args.it:
                            continue
                    elif checkpoint == 'pretrained':
                        it = 'pretrained'
                    it = str(it)

                    clusterer_path = os.path.join(root, checkpoint_dir, f'clusterer{it}.pkl')
                    #  don't save samples for each iteration for disk space
                    samples_path = os.path.join(outdir, 'results', 'samples.npz')

                    targets = []
                    if args.inception:
                        targets = targets + [is_results]
                    if args.fid:
                        targets = targets + [fid_results]
                    if args.modes:
                        targets = targets + [kl_results, nmodes_results]
                    if args.fsd:
                        targets = targets + [fsd_results]

                    if all([it in result for result in targets]):
                        print('Already generated', it, path)
                    else:
                        sampler = SeededSampler(path,
                                                model_path=os.path.join(root, checkpoint_dir, checkpoint),
                                                clusterer_path=clusterer_path,
                                                pretrained=config['pretrained'])
                        samples = sample(sampler)
                        dataset_name = get_dataset_from_path(path)
                        np.savez(samples_path, fake=samples, real=dataset_name)

                    arguments = f'--samples {samples_path} --it {it} --results_dir {results_dir}'
                    if args.fid and it not in fid_results:
                        os.system(f'CUDA_VISIBLE_DEVICES={device} python gan_training/metrics/fid.py {arguments}')
                    if args.inception and it not in is_results:
                        os.system(f'CUDA_VISIBLE_DEVICES={device} python gan_training/metrics/tf_is/inception_score.py {arguments}')
                    if args.modes and (it not in kl_results or it not in nmodes_results):
                        os.system(f'CUDA_VISIBLE_DEVICES={device} python utils/get_empirical_distribution.py {arguments} --dataset {dataset_name}')
                    if args.cluster_metrics and it not in cluster_results:
                        os.system(f'CUDA_VISIBLE_DEVICES={device} python cluster_metrics.py {path} --model_it {it}')
                    if args.fsd and it not in fsd_results:
                        gt_path = dataset_to_img[dataset_name]
                        os.system(f'CUDA_VISIBLE_DEVICES={device} python -m seeing.fsd {gt_path} {samples_path} --it {it} --results_dir {results_dir}')


================================================
FILE: requirements.txt
================================================
pytorch-gpu==1.3.1
tensorflow-gpu==1.14.0
scikit-learn
scikit-image
torchvision
tqdm 
pyyaml
cloudpickle
ipython
opencv

================================================
FILE: seeded_sampler.py
================================================
''' Samples from a (class-conditional) GAN, so that the samples can be reproduced '''

import os
import pickle
import random
import copy

import torch
from torch import nn

from gan_training.checkpoints import CheckpointIO
from gan_training.config import (load_config, build_models)
from seeing.yz_dataset import YZDataset


def get_most_recent(models):
    model_numbers = [
        int(model.split("model.pt")[0]) if model != "model.pt" else 0
        for model in models
    ]
    return str(max(model_numbers)) + "model.pt"


class SeededSampler():
    def __init__(
            self,
            config_name,        # name of experiment's config file
            model_path="",      # path to the model. empty string infers the most recent checkpoint
            clusterer_path="",  # path to the clusterer, ignored if gan type doesn't require a clusterer
            pretrained={},      # urls to the pretrained models
            rootdir='./',
            device='cuda:0'):
        self.config = load_config(os.path.join(rootdir, config_name), 'configs/default.yaml')
        self.model_path = model_path
        self.clusterer_path = clusterer_path
        self.rootdir = rootdir
        self.nlabels = self.config['generator']['nlabels']
        self.device = device
        self.pretrained = pretrained

        self.generator = self.get_generator()
        self.generator.eval()
        self.yz_dist = self.get_yz_dist()

    def sample(self, nimgs):
        '''
        samples an image using the generator, with z drawn from isotropic gaussian, and y drawn from self.yz_dist.
        For baseline methods, y doesn't matter because y is ignored in the input
        yz_dist is the empirical label distribution for the clustered gans.

        returns the image, and the integer seed used to generate it. generated sample is in [-1, 1]
        '''
        self.generator.eval()
        with torch.no_grad():
            seeds = [random.randint(0, 1e8) for _ in range(nimgs)]
            z, y = self.yz_dist(seeds)
            return self.generator(z, y), seeds

    def conditional_sample(self, yi, seed=None):
        ''' returns a generated sample, which is in [-1, 1], seed is an int'''
        self.generator.eval()
        with torch.no_grad():
            if seed is None:
                seed = [random.randint(0, 1e8)]
            else:
                seed = [seed]
            z, _ = self.yz_dist(seed)
            y = torch.LongTensor([yi]).to(self.device)
            return self.generator(z, y)

    def sample_with_seed(self, seeds):
        ''' returns a generated sample, which is in [-1, 1] '''
        self.generator.eval()
        z, y = self.yz_dist(seeds)
        return self.generator(z, y)

    def get_zy(self, seeds):
        '''returns the batch of z, y corresponding to the seeds'''
        return self.yz_dist(seeds)

    def sample_with_zy(self, z, y):
        ''' returns a generated sample given z and y, which is in [-1, 1].'''
        self.generator.eval()
        return self.generator(z, y)

    def get_generator(self):
        ''' loads a generator according to self.model_path '''

        exp_out_dir = os.path.join(self.rootdir, self.config['training']['out_dir'])
        # infer checkpoint if neeeded
        checkpoint_dir = os.path.join(exp_out_dir, 'chkpts') if self.model_path == "" or 'model' in self.pretrained else "./"
        model_name = get_most_recent(os.listdir(checkpoint_dir)) if self.model_path == "" else self.model_path

        checkpoint_io = CheckpointIO(checkpoint_dir=checkpoint_dir)
        self.checkpoint_io = checkpoint_io

        generator, _ = build_models(self.config)
        generator = generator.to(self.device)
        generator = nn.DataParallel(generator)

        if self.config['training']['take_model_average']:
            generator_test = copy.deepcopy(generator)
            checkpoint_io.register_modules(generator_test=generator_test)
        else:
            generator_test = generator

        checkpoint_io.register_modules(generator=generator)

        try:
            it = checkpoint_io.load(model_name, pretrained=self.pretrained)
            assert (it != -1)
        except Exception as e:
            # try again without data parallel
            print(e)
            checkpoint_io.register_modules(generator=generator.module)
            checkpoint_io.register_modules(generator_test=generator_test.module)
            it = checkpoint_io.load(model_name, pretrained=self.pretrained)
            assert (it != -1)

        print('Loaded iteration:', it['it'])
        return generator_test

    def get_yz_dist(self):
        '''loads the z and y dists used to sample from the generator.'''

        if self.config['clusterer']['name'] != 'supervised':
            if 'clusterer' in self.pretrained:
                clusterer = self.checkpoint_io.load_clusterer('pretrained', load_samples=False, pretrained=self.pretrained)
            elif os.path.exists(self.clusterer_path):
                with open(self.clusterer_path, 'rb') as f:
                    clusterer = pickle.load(f)

            if isinstance(clusterer.discriminator, nn.DataParallel):
                clusterer.discriminator = clusterer.discriminator.module

            if clusterer.kmeans is not None:
                # use clusterer empirical distribution as sampling
                print('Using k-means empirical distribution')
                distribution = clusterer.get_label_distribution()
                probs = [f / sum(distribution) for f in distribution]
            else:
                # otherwise, use a uniform distribution. this is not desired, unless it's a random label or unconditional GAN
                print("Sampling with uniform distribution over", clusterer.k, "labels")
                probs = [1. / clusterer.k for _ in range(clusterer.k)]
        else:
            # if it's supervised, then sample uniformly over all classes.
            # this might not be the right thing to do, since datasets are usually imbalanced.
            print("Sampling with uniform distribution over", self.nlabels,
                  "labels")
            probs = [1. / self.nlabels for _ in range(self.nlabels)]

        return YZDataset(zdim=self.config['z_dist']['dim'],
                         nlabels=len(probs),
                         distribution=probs,
                         device=self.device)


================================================
FILE: seeing/frechet_distance.py
================================================
#!/usr/bin/env python3
"""Calculates the Frechet Distance (FD) between two samples.

Code apapted from https://github.com/bioinf-jku/TTUR to use PyTorch instead
of Tensorflow

Copyright 2018 Institute of Bioinformatics, JKU Linz

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import numpy as np
import torch
from scipy import linalg

def sample_frechet_distance(sample1, sample2, eps=1e-6,
        return_components=False):
    '''
    Both samples should be numpy arrays.
    Returns the Frechet distance.
    '''
    (mu1, sigma1), (mu2, sigma2) = [calculate_activation_statistics(s)
            for s in [sample1, sample2]]
    return calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=eps,
            return_components=return_components)

def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6,
        return_components=False):
    """Numpy implementation of the Frechet Distance.
    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
    and X_2 ~ N(mu_2, C_2) is
            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).

    Stable version by Dougal J. Sutherland.

    Params:
    -- mu1   : Numpy array containing the activations of a layer of the
               inception net (like returned by the function 'get_predictions')
               for generated samples.
    -- mu2   : The sample mean over activations, precalculated on an
               representative data set.
    -- sigma1: The covariance matrix over activations for generated samples.
    -- sigma2: The covariance matrix over activations, precalculated on an
               representative data set.

    Returns:
    --   : The Frechet Distance.
    """

    mu1 = np.atleast_1d(mu1)
    mu2 = np.atleast_1d(mu2)

    sigma1 = np.atleast_2d(sigma1)
    sigma2 = np.atleast_2d(sigma2)

    assert mu1.shape == mu2.shape, \
        'Training and test mean vectors have different lengths'
    assert sigma1.shape == sigma2.shape, \
        'Training and test covariances have different dimensions'

    diff = mu1 - mu2

    # Product might be almost singular
    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
    if not np.isfinite(covmean).all():
        msg = ('fid calculation produces singular product; '
               'adding %s to diagonal of cov estimates') % eps
        print(msg)
        offset = np.eye(sigma1.shape[0]) * eps
        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))

    # Numerical error might give slight imaginary component
    if np.iscomplexobj(covmean):
        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
            m = np.max(np.abs(covmean.imag))
            raise ValueError('Imaginary component {}'.format(m))
        covmean = covmean.real

    tr_covmean = np.trace(covmean)

    meandiff = diff.dot(diff)
    covdiff = np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean
    if return_components:
        return (meandiff + covdiff, meandiff, covdiff)
    else:
        return meandiff + covdiff


def calculate_activation_statistics(act):
    """Calculation of the statistics used by the FID.
    Params:
    -- files       : List of image files paths
    -- model       : Instance of inception model
    -- batch_size  : The images numpy array is split into batches with
                     batch size batch_size. A reasonable batch size
                     depends on the hardware.
    -- dims        : Dimensionality of features returned by Inception
    -- cuda        : If set to True, use GPU
    -- verbose     : If set to True and parameter out_step is given, the
                     number of calculated batches is reported.
    Returns:
    -- mu    : The mean over samples of the activations of the pool_3 layer of
               the inception model.
    -- sigma : The covariance matrix of the activations of the pool_3 layer of
               the inception model.
    """
    mu = np.mean(act, axis=0)
    sigma = np.cov(act, rowvar=False)
    return mu, sigma


================================================
FILE: seeing/fsd.py
================================================
import torch, argparse, sys, os, numpy
from .sampler import FixedRandomSubsetSampler, FixedSubsetSampler
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
from torchvision import transforms, utils
from . import pbar, zdataset, segmenter, frechet_distance, parallelfolder

NUM_OBJECTS = 336


def main():
    parser = argparse.ArgumentParser(description='Net dissect utility')
    parser.add_argument('true_dir')
    parser.add_argument('gen_dir')
    parser.add_argument('--size', type=int, default=10000)
    parser.add_argument('--cachedir', default='results/fsd/cache')
    parser.add_argument('--histout', default=None)
    parser.add_argument('--maxscale', type=float, default=50)
    parser.add_argument('--labelcount', type=int, default=30)
    parser.add_argument('--dpi', type=float, default=100)
    parser.add_argument('--it', type=str, default="-1")
    parser.add_argument('--results_dir', default=None, help='path to results_dir')
    args = parser.parse_args()
    if len(sys.argv) == 1:
        parser.print_usage(sys.stderr)
        sys.exit(1)
    args = parser.parse_args()
    print(args.true_dir, args.gen_dir)
    true_dir, gen_dir = args.true_dir, args.gen_dir
    seed1, seed2 = [1, 1 if true_dir != gen_dir else 2]
    true_tally, gen_tally = [
        cached_tally_directory(d,
                               size=args.size,
                               cachedir=args.cachedir,
                               seed=seed)
        for d, seed in [(true_dir, seed1), (gen_dir, seed2)]
    ]
    fsd, meandiff, covdiff = frechet_distance.sample_frechet_distance(
        true_tally * 100, gen_tally * 100, return_components=True)
    print('fsd: %f; meandiff: %f; covdiff: %f' % (fsd, meandiff, covdiff))
    if args.histout is not None:
        diff_figure(true_tally * 100,
                    gen_tally * 100,
                    labelcount=args.labelcount,
                    maxscale=args.maxscale,
                    dpi=args.dpi).savefig(args.histout)

    if args.results_dir is not None:
        import json

        it = args.it
        results_dir = args.results_dir

        with open(os.path.join(args.results_dir, 'fsd_results.json')) as f:
            fsd_results = json.load(f)

        fsd_results[it] = (fsd, meandiff, covdiff)
        
        with open(os.path.join(args.results_dir, 'fsd_results.json'), 'w') as f:
            f.write(json.dumps(fsd_results))

        diff_figure(true_tally * 100,
                    gen_tally * 100,
                    labelcount=args.labelcount,
                    maxscale=args.maxscale,
                    dpi=args.dpi).savefig(os.path.join(args.results_dir, f'fsd_{it}.png'))
    
def cached_tally_directory(directory, size=10000, cachedir=None, seed=1):
    filename = '%s_segtally_%d.npy' % (directory, size)
    if seed != 1:
        filename = '%d_%s' % (seed, filename)
    if cachedir is not None:
        filename = os.path.join(cachedir, filename.replace('/', '_'))
    #load only if gt stats, or image directory
    if os.path.isfile(filename) and (not directory.endswith('.npz') or 'gt' in directory):
        return numpy.load(filename)
    os.makedirs(cachedir, exist_ok=True)
    result = tally_directory(directory, size, seed=seed)
    numpy.save(filename, result)
    return result


def tally_directory(directory, size=10000, seed=1):
    if directory.endswith('.npz'):
        with np.load(directory) as f:
            images = torch.from_numpy(f['fake'])
            images = images.permute(0, 3, 1, 2) #BHWC -> BCHW
            images = (images/127.5) - 1 #normalize in [-1, 1]
            images = torch.nn.functional.interpolate(images, size=(256, 256))
            print(images.shape, images.max(), images.min())
        dataset = TensorDataset(images)
    else:  
        dataset = parallelfolder.ParallelImageFolders(
            [directory],
            transform=transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(256),
                transforms.ToTensor(),
                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ]))
    loader = DataLoader(dataset,
                        sampler=FixedRandomSubsetSampler(dataset,
                                                         end=size,
                                                         seed=seed),
                        batch_size=10,
                        pin_memory=True)
    upp = segmenter.UnifiedParsingSegmenter()
    labelnames, catnames = upp.get_label_and_category_names()
    result = numpy.zeros((size, NUM_OBJECTS), dtype=numpy.float)
    batch_result = torch.zeros(loader.batch_size,
                               NUM_OBJECTS,
                               dtype=torch.float).cuda()
    with torch.no_grad():
        batch_index = 0
        for [batch] in pbar(loader):
            seg_result = upp.segment_batch(batch.cuda())
            for i in range(len(batch)):
                batch_result[i] = (seg_result[i, 0].view(-1).bincount(
                    minlength=NUM_OBJECTS).float() /
                                   (seg_result.shape[2] * seg_result.shape[3]))
            result[batch_index:batch_index +
                   len(batch)] = (batch_result.cpu().numpy())
            batch_index += len(batch)
    return result


def tally_dataset_objects(dataset, size=10000):
    loader = DataLoader(dataset,
                        sampler=FixedRandomSubsetSampler(dataset, end=size),
                        batch_size=10,
                        pin_memory=True)
    upp = segmenter.UnifiedParsingSegmenter()
    labelnames, catnames = upp.get_label_and_category_names()
    result = numpy.zeros((size, NUM_OBJECTS), dtype=numpy.float)
    batch_result = torch.zeros(loader.batch_size,
                               NUM_OBJECTS,
                               dtype=torch.float).cuda()
    with torch.no_grad():
        batch_index = 0
        for [batch] in pbar(loader):
            seg_result = upp.segment_batch(batch.cuda())
            for i in range(len(batch)):
                batch_result[i] = (seg_result[i, 0].view(-1).bincount(
                    minlength=NUM_OBJECTS).float() /
                                   (seg_result.shape[2] * seg_result.shape[3]))
            result[batch_index:batch_index +
                   len(batch)] = (batch_result.cpu().numpy())
            batch_index += len(batch)
    return result


def tally_generated_objects(model, size=10000):
    zds = zdataset.z_dataset_for_model(model, size)
    loader = DataLoader(zds, batch_size=10, pin_memory=True)
    upp = segmenter.UnifiedParsingSegmenter()
    labelnames, catnames = upp.get_label_and_category_names()
    result = numpy.zeros((size, NUM_OBJECTS), dtype=numpy.float)
    batch_result = torch.zeros(loader.batch_size,
                               NUM_OBJECTS,
                               dtype=torch.float).cuda()
    with torch.no_grad():
        batch_index = 0
        for [zbatch] in pbar(loader):
            img = model(zbatch.cuda())
            seg_result = upp.segment_batch(img)
            for i in range(len(zbatch)):
                batch_result[i] = (seg_result[i, 0].view(-1).bincount(
                    minlength=NUM_OBJECTS).float() /
                                   (seg_result.shape[2] * seg_result.shape[3]))
            result[batch_index:batch_index +
                   len(zbatch)] = (batch_result.cpu().numpy())
            batch_index += len(zbatch)
    return result


def diff_figure(ttally,
                gtally,
                labelcount=30,
                labelleft=True,
                dpi=100,
                maxscale=50.0,
                legend=False):
    from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
    from matplotlib.figure import Figure
    tresult, gresult = [t.mean(0) for t in [ttally, gtally]]
    upp = segmenter.UnifiedParsingSegmenter()
    labelnames, catnames = upp.get_label_and_category_names()
    x = []
    labels = []
    gen_amount = []
    change_frac = []
    true_amount = []
    for label in numpy.argsort(-tresult):
        if label == 0 or labelnames[label][1] == 'material':
            continue
        if tresult[label] == 0:
            break
        x.append(len(x))
        labels.append(labelnames[label][0].split()[0])
        true_amount.append(tresult[label].item())
        gen_amount.append(gresult[label].item())
        change_frac.append(
            (float(gresult[label] - tresult[label]) / tresult[label]))
        if len(x) >= labelcount:
            break
    fig = Figure(dpi=dpi, figsize=(1.4 + 5.0 * labelcount / 30, 4.0))
    FigureCanvas(fig)
    a1, a0 = fig.subplots(2, 1, gridspec_kw={'height_ratios': [1, 2]})
    a0.bar(x, change_frac, label='relative delta')
    a0.set_xticks(x)
    a0.set_xticklabels(labels, rotation='vertical')
    if labelleft:
        a0.set_ylabel('relative delta\n(gen - train) / train')
    a0.set_xlim(-1.0, len(x))
    a0.set_ylim([-1, 1.1])
    a0.grid(axis='y', antialiased=False, alpha=0.25)
    if legend:
        a0.legend(loc=2)
    prev_high = None
    for ix, cf in enumerate(change_frac):
        if cf > 1.15:
            if prev_high == (ix - 1):
                offset = 0.1
            else:
                offset = 0.0
                prev_high = ix
            a0.text(ix,
                    1.15 + offset,
                    '%.1f' % cf,
                    horizontalalignment='center',
                    size=6)

    a1.bar(x, true_amount, label='training')
    a1.plot(x, gen_amount, linewidth=3, color='red', label='generated')
    a1.set_yscale('log')
    a1.set_xlim(-1.0, len(x))
    a1.set_ylim(maxscale / 5000, maxscale)
    from matplotlib.ticker import LogLocator
    # a1.yaxis.set_major_locator(LogLocator(subs=(1,)))
    # a1.yaxis.set_minor_locator(LogLocator(subs=(1,), numdecs=10))
    # a1.yaxis.set_minor_locator(LogLocator(subs=(1,2,3,4,5,6,7,8,9)))
    # a1.yaxis.set_minor_locator(yminor_locator)
    if labelleft:
        a1.set_ylabel('mean area\nlog scale')
    if legend:
        a1.legend()
    a1.set_yticks([1e-2, 1e-1, 1.0, 1e+1])
    a1.set_yticks([
        a * b for a in [1e-2, 1e-1, 1.0, 1e+1]
        for b in range(1, 10) if maxscale / 5000 <= a * b <= maxscale
    ], True)  # minor ticks.
    a1.set_xticks([])
    fig.tight_layout()
    return fig


if __name__ == '__main__':
    main()


================================================
FILE: seeing/lightbox.html
================================================
<!DOCTYPE html>
<html>
<!--
  +lightbox.html, a page for automatically showing all images in a
  directory on an Apache server. Just copy it into the directory.
  Works by scraping the default directory HTML at "./" - David Bau.
-->

<head>
  <script src="https://cdn.jsdelivr.net/npm/vue@2.5.16/dist/vue.js"
    integrity="sha256-CMMTrj5gGwOAXBeFi7kNokqowkzbeL8ydAJy39ewjkQ=" crossorigin="anonymous"></script>
  <script src="https://cdn.jsdelivr.net/npm/lodash@4.17.10/lodash.js"
    integrity="sha256-qwbDmNVLiCqkqRBpF46q5bjYH11j5cd+K+Y6D3/ja28=" crossorigin="anonymous"></script>
  <script src="https://code.jquery.com/jquery-3.3.1.js" integrity="sha256-2Kok7MbOyxpgUVvAk/HJ2jigOSYS2auK4Pfzbm7uH60="
    crossorigin="anonymous"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/lity/2.3.1/lity.js"
    integrity="sha256-28JiZvE/RethQIYCwkMdtSMHgI//KoTLeB2tSm10trs=" crossorigin="anonymous"></script>
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/lity/2.3.1/lity.css"
    integrity="sha256-76wKiAXVBs5Kyj7j0T43nlBCbvR6pqdeeZmXI4ATnY0=" crossorigin="anonymous" />
  <style>
    h3 {
      font-family: sans-serif;
      font-size: 18px;
    }

    .thumb,
    .filter {
      font-family: sans-serif;
      font-size: 12px;
    }

    .filter {
      padding-bottom: 10px;
    }

    .thumb {
      display: inline-block;
      margin: 1px;
      text-align: center;
    }

    .thumb img,
    .thumb div {
      max-width: 150px;
      word-break: break-all;
    }
  </style>
</head>

<body>
  <div id="app" v-if="images">
    <h3>Images in <a :href="directory">{{ directory }}</a></h3>
    <div class="filter">
      Filter: <input v-model="pattern" placeholder="regexp">
    </div>
    <div v-for="r in images" class="thumb" v-if="patternRe.test(r)">
      <div>{{ r }}</div>
      <a :href="r" data-lity><img :src="r"></a>
    </div>
  </div>
  <!--app-->
</body>
<script>
  var theapp = new Vue({
    el: '#app',
    data: {
      directory: window.location.pathname.replace(/[^\/]*$/, ''),
      images: null,
      pattern: '',
    },
    created: function () {
      var self = this;
      $.get('./?' + Math.random(), function (d) {
        var imgurls = $.map($(d).find('a'),
          x => x.href).filter(
            x => x.match(/\.(jpg|jpeg|png|gif|svg)$/i)).map(
              x => x.replace(/.*\//, ''));
        self.images = imgurls;
      }, 'html');
    },
    computed: {
      patternRe: function () {
        try {
          return RegExp(this.pattern);
        } catch (e) {
          return /.*/;
        }
      }
    },
  })
</script>

</html>

================================================
FILE: seeing/parallelfolder.py
================================================
'''
Variants of pytorch's ImageFolder for loading image datasets with more
information, such as parallel feature channels in separate files,
cached files with lists of filenames, etc.
'''

import os, torch, re, random, numpy, itertools
import torch.utils.data as data
from torchvision.datasets.folder import default_loader as tv_default_loader
from PIL import Image
from collections import OrderedDict
from . import pbar

def grayscale_loader(path):
    with open(path, 'rb') as f:
        return Image.open(f).convert('L')

class ndarray(numpy.ndarray):
    '''
    Wrapper to make ndarrays into heap objects so that shared_state can
    be attached as an attribute.
    '''
    pass

def default_loader(filename):
    '''
    Handles both numpy files and image formats.
    '''
    if filename.endswith('.npy'):
        return numpy.load(filename).view(ndarray)
    elif filename.endswith('.npz'):
        return numpy.load(filename)
    else:
        return tv_default_loader(filename)

class ParallelImageFolders(data.Dataset):
    """
    A data loader that looks for parallel image filenames, for example

    photo1/park/004234.jpg
    photo1/park/004236.jpg
    photo1/park/004237.jpg

    photo2/park/004234.png
    photo2/park/004236.png
    photo2/park/004237.png
    """
    def __init__(self, image_roots,
            transform=None,
            loader=default_loader,
            stacker=None,
            classification=False,
            intersection=False,
            filter_tuples=None,
            verbose=None,
            size=None,
            shuffle=None,
            lazy_init=True):
        self.image_roots = image_roots
        if transform is not None and not hasattr(transform, '__iter__'):
            transform = [transform for _ in image_roots]
        self.transforms = transform
        self.stacker = stacker
        self.loader = loader
        def do_lazy_init():
            self.images, self.classes, self.class_to_idx = (
                    make_parallel_dataset(image_roots,
                        classification=classification,
                        intersection=intersection,
                        filter_tuples=filter_tuples,
                        verbose=verbose))
            if len(self.images) == 0:
                raise RuntimeError("Found 0 images within: %s" % image_roots)
            if shuffle is not None:
                random.Random(shuffle).shuffle(self.images)
            if size is not None:
                self.image = self.images[:size]
            self._do_lazy_init = None
        # Do slow initialization lazily.
        if lazy_init:
            self._do_lazy_init = do_lazy_init
        else:
            do_lazy_init()

    def __getattr__(self, attr):
        if self._do_lazy_init is not None:
            self._do_lazy_init()
            return getattr(self, attr)
        raise AttributeError()

    def __getitem__(self, index):
        if self._do_lazy_init is not None:
            self._do_lazy_init()
        paths = self.images[index]
        if self.classes is not None:
            classidx = paths[-1]
            paths = paths[:-1]
        sources = [self.loader(path) for path in paths]
        # Add a common shared state dict to allow random crops/flips to be
        # coordinated.
        shared_state = {}
        for s in sources:
            try:
                s.shared_state = shared_state
            except:
                pass
        if self.transforms is not None:
            sources = [transform(source) if transform is not None else source
                    for source, transform
                    in itertools.zip_longest(sources, self.transforms)]
        if self.stacker is not None:
            sources = self.stacker(sources)
            if self.classes is not None:
                sources = (sources, classidx)
        else:
            if self.classes is not None:
                sources.append(classidx)
            sources = tuple(sources)
        return sources

    def __len__(self):
        if self._do_lazy_init is not None:
            self._do_lazy_init()
        return len(self.images)

def is_npy_file(path):
    return path.endswith('.npy') or path.endswith('.NPY')

def is_image_file(path):
    return None != re.search(r'\.(jpe?g|png)$', path, re.IGNORECASE)

def walk_image_files(rootdir, verbose=None):
    indexfile = '%s.txt' % rootdir
    if os.path.isfile(indexfile):
        basedir = os.path.dirname(rootdir)
        with open(indexfile) as f:
            result = sorted([os.path.join(basedir, line.strip())
                for line in f.readlines()])
            return result
    result = []
    for dirname, _, fnames in sorted(pbar(os.walk(rootdir),
            desc='Walking %s' % os.path.basename(rootdir))):
        for fname in sorted(fnames):
            if is_image_file(fname) or is_npy_file(fname):
                result.append(os.path.join(dirname, fname))
    return result

def make_parallel_dataset(image_roots, classification=False,
        intersection=False, filter_tuples=None, verbose=None):
    """
    Returns ([(img1, img2, clsid), (img1, img2, clsid)..],
             classes, class_to_idx)
    """
    image_roots = [os.path.expanduser(d) for d in image_roots]
    image_sets = OrderedDict()
    for j, root in enumerate(image_roots):
        for path in walk_image_files(root, verbose=verbose):
            key = os.path.splitext(os.path.relpath(path, root))[0]
            if key not in image_sets:
                image_sets[key] = []
            if not intersection and len(image_sets[key]) != j:
                raise RuntimeError(
                    'Images not parallel: %s missing from one dir' % (key))
            image_sets[key].append(path)
    if classification:
        classes = sorted(set([os.path.basename(os.path.dirname(k))
            for k in image_sets.keys()]))
        class_to_idx = dict({k: v for v, k in enumerate(classes)})
        for k, v in image_sets.items():
            v.append(class_to_idx[os.path.basename(os.path.dirname(k))])
    else:
        classes, class_to_idx = None, None
    tuples = []
    for key, value in image_sets.items():
        if len(value) != len(image_roots) + (1 if classification else 0):
            if intersection:
                continue
            else:
                raise RuntimeError(
                    'Images not parallel: %s missing from one dir' % (key))
        value = tuple(value)
        if filter_tuples and not filter_tuples(value):
            continue
        tuples.append(value)
    return tuples, classes, class_to_idx


================================================
FILE: seeing/pbar.py
================================================
'''
Utilities for showing progress bars, controlling default verbosity, etc.
'''

# If the tqdm package is not available, then do not show progress bars;
# just connect print_progress to print.
import sys, types, builtins
try:
    from tqdm import tqdm, tqdm_notebook
except:
    tqdm = None

default_verbosity = True
next_description = None
python_print = builtins.print

def post(**kwargs):
    '''
    When within a progress loop, pbar.post(k=str) will display
    the given k=str status on the right-hand-side of the progress
    status bar.  If not within a visible progress bar, does nothing.
    '''
    innermost = innermost_tqdm()
    if innermost is not None:
        innermost.set_postfix(**kwargs)

def desc(desc):
    '''
    When within a progress loop, pbar.desc(str) changes the
    left-hand-side description of the loop toe the given description.
    '''
    innermost = innermost_tqdm()
    if innermost is not None:
        innermost.set_description(str(desc))

def descnext(desc):
    '''
    Called before starting a progress loop, pbar.descnext(str)
    sets the description text that will be used in the following loop.
    '''
    global next_description
    if not default_verbosity or tqdm is None:
        return
    next_description = desc

def print(*args):
    '''
    When within a progress loop, will print above the progress loop.
    '''
    global next_description
    next_description = None
    if default_verbosity:
        msg = ' '.join(str(s) for s in args)
        if tqdm is None:
            python_print(msg)
        else:
            tqdm.write(msg)

def tqdm_terminal(it, *args, **kwargs):
    '''
    Some settings for tqdm that make it run better in resizable terminals.
    '''
    return tqdm(it, *args, dynamic_ncols=True, ascii=True,
            leave=(innermost_tqdm() is not None), **kwargs)

def in_notebook():
    '''
    True if running inside a Jupyter notebook.
    '''
    # From https://stackoverflow.com/a/39662359/265298
    try:
        shell = get_ipython().__class__.__name__
        if shell == 'ZMQInteractiveShell':
            return True   # Jupyter notebook or qtconsole
        elif shell == 'TerminalInteractiveShell':
            return False  # Terminal running IPython
        else:
            return False  # Other type (?)
    except NameError:
        return False      # Probably standard Python interpreter

def innermost_tqdm():
    '''
    Returns the innermost active tqdm progress loop on the stack.
    '''
    if hasattr(tqdm, '_instances') and len(tqdm._instances) > 0:
        return max(tqdm._instances, key=lambda x: x.pos)
    else:
        return None

def reporthook(*args, **kwargs):
    '''
    For use with urllib.request.urlretrieve.

    with pbar.reporthook() as hook:
        urllib.request.urlretrieve(url, filename, reporthook=hook)
    '''
    kwargs2 = dict(unit_scale=True, miniters=1)
    kwargs2.update(kwargs)
    bar = __call__(None, *args, **kwargs2)
    class ReportHook(object):
        def __init__(self, t):
            self.t = t
        def __call__(self, b=1, bsize=1, tsize=None):
            if hasattr(self.t, 'total'):
                if tsize is not None:
                    self.t.total = tsize
            if hasattr(self.t, 'update'):
                self.t.update(b * bsize - self.t.n)
        def __enter__(self):
            return self
        def __exit__(self, *exc):
            if hasattr(self.t, '__exit__'):
                self.t.__exit__(*exc)
    return ReportHook(bar)

def __call__(x, *args, **kwargs):
    '''
    Invokes a progress function that can wrap iterators to print
    progress messages, if verbose is True.
   
    If verbose is False or tqdm is unavailable, then a quiet
    non-printing identity function is used.

    verbose can also be set to a spefific progress function rather
    than True, and that function will be used.
    '''
    global default_verbosity, next_description
    if not default_verbosity or tqdm is None:
        return x
    if default_verbosity == True:
        fn = tqdm_notebook if in_notebook() else tqdm_terminal
    else:
        fn = default_verbosity
    if next_description is not None:
        kwargs = dict(kwargs)
        kwargs['desc'] = next_description
        next_description = None
    return fn(x, *args, **kwargs)

class VerboseContextManager():
    def __init__(self, v, entered=False):
        self.v, self.entered, self.saved = v, False, []
        if entered:
            self.__enter__()
            self.entered = True
    def __enter__(self):
        global default_verbosity
        if self.entered:
            self.entered = False
        else:
            self.saved.append(default_verbosity)
            default_verbosity = self.v
        return self
    def __exit__(self, exc_type, exc_value, exc_traceback):
        global default_verbosity
        default_verbosity = self.saved.pop()
    def __call__(self, v=True):
        '''
        Calling the context manager makes a new context that is
        pre-entered, so it works as both a plain function and as a
        factory for a context manager.
        '''
        new_v = v if self.v else not v
        cm = VerboseContextManager(new_v, entered=True)
        default_verbosity = new_v
        return cm

# Use as either "with pbar.verbose:" or "pbar.verbose(False)", or also
# "with pbar.verbose(False):"
verbose = VerboseContextManager(True)

# Use as either "with @pbar.quiet" or "pbar.quiet(True)". or also
# "with pbar.quiet(True):"
quiet = VerboseContextManager(False)

class CallableModule(types.ModuleType):
    def __init__(self):
        # or super().__init__(__name__) for Python 3
        types.ModuleType.__init__(self, __name__)
        self.__dict__.update(sys.modules[__name__].__dict__)
    def __call__(self, x, *args, **kwargs):
        return __call__(x, *args, **kwargs)

sys.modules[__name__] = CallableModule()


================================================
FILE: seeing/pidfile.py
================================================
'''
Utility for simple distribution of work on multiple processes, by
making sure only one process is working on a job at once.
'''

import os, errno, socket, atexit, time, sys

def exit_if_job_done(directory, redo=False, force=False, verbose=True):
    if pidfile_taken(os.path.join(directory, 'lockfile.pid'),
            force=force, verbose=verbose):
        sys.exit(0)
    donefile = os.path.join(directory, 'done.txt')
    if os.path.isfile(donefile):
        with open(donefile) as f:
            msg = f.read()
        if redo or force:
            if verbose:
                print('Removing %s %s' % (donefile, msg))
            os.remove(donefile)
        else:
            if verbose:
                print('%s %s' % (donefile, msg))
            sys.exit(0)

def mark_job_done(directory):
    with open(os.path.join(directory, 'done.txt'), 'w') as f:
        f.write('done by %d@%s %s at %s' %
                (os.getpid(), socket.gethostname(),
                 os.getenv('STY', ''),
                 time.strftime('%c')))

def pidfile_taken(path, verbose=False, force=False):
    '''
    Usage.  To grab an exclusive lock for the remaining duration of the
    current process (and exit if another process already has the lock),
    do this:

    if pidfile_taken('job_423/lockfile.pid', verbose=True):
        sys.exit(0)

    To do a batch of jobs, just run a script that does them all on
    each available machine, sharing a network filesystem.  When each
    job grabs a lock, then this will automatically distribute the
    jobs so that each one is done just once on one machine.
    '''

    # Try to create the file exclusively and write my pid into it.
    try:
        os.makedirs(os.path.dirname(path), exist_ok=True)
        fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_RDWR)
    except OSError as e:
        if e.errno == errno.EEXIST:
            # If we cannot because there was a race, yield the conflicter.
            conflicter = 'race'
            try:
                with open(path, 'r') as lockfile:
                    conflicter = lockfile.read().strip() or 'empty'
            except:
                pass
            # Force is for manual one-time use, for deleting stale lockfiles.
            if force:
                if verbose:
                    print('Removing %s from %s' % (path, conflicter))
                os.remove(path)
                return pidfile_taken(path, verbose=verbose, force=False)
            if verbose:
                print('%s held by %s' % (path, conflicter))
            return conflicter
        else:
            # Other problems get an exception.
            raise
    # Register to delete this file on exit.
    lockfile = os.fdopen(fd, 'r+')
    atexit.register(delete_pidfile, lockfile, path)
    # Write my pid into the open file.
    lockfile.write('%d@%s %s\n' % (os.getpid(), socket.gethostname(),
        os.getenv('STY', '')))
    lockfile.flush()
    os.fsync(lockfile)
    # Return 'None' to say there was not a conflict.
    return None

def delete_pidfile(lockfile, path):
    '''
    Runs at exit after pidfile_taken succeeds.
    '''
    if lockfile is not None:
        try:
            lockfile.close()
        except:
            pass
    try:
        os.unlink(path)
    except:
        pass


================================================
FILE: seeing/sampler.py
================================================
'''
A sampler is just a list of integer listing the indexes of the
inputs in a data set to sample.  For reproducibility, the
FixedRandomSubsetSampler uses a seeded prng to produce the same
sequence always.  FixedSubsetSampler is just a wrapper for an
explicit list of integers.

coordinate_sample solves another sampling problem: when testing
convolutional outputs, we can reduce data explosing by sampling
random points of the feature map rather than the entire feature map.
coordinate_sample does this in a deterministic way that is also
resolution-independent.
'''

import numpy
import random
from torch.utils.data.sampler import Sampler

class FixedSubsetSampler(Sampler):
    """Represents a fixed sequence of data set indices.
    Subsets can be created by specifying a subset of output indexes.
    """
    def __init__(self, samples):
        self.samples = samples

    def __iter__(self):
        return iter(self.samples)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, key):
        return self.samples[key]

    def subset(self, new_subset):
        return FixedSubsetSampler(self.dereference(new_subset))

    def dereference(self, indices):
        '''
        Translate output sample indices (small numbers indexing the sample)
        to input sample indices (larger number indexing the original full set)
        '''
        return [self.samples[i] for i in indices]


class FixedRandomSubsetSampler(FixedSubsetSampler):
    """Samples a fixed number of samples from the dataset, deterministically.
    Arguments:
        data_source,
        sample_size,
        seed (optional)
    """
    def __init__(self, data_source, start=None, end=None, seed=1):
        rng = random.Random(seed)
        shuffled = list(range(len(data_source)))
        rng.shuffle(shuffled)
        self.data_source = data_source
        super(FixedRandomSubsetSampler, self).__init__(shuffled[start:end])

    def class_subset(self, class_filter):
        '''
        Returns only the subset matching the given rule.
        '''
        if isinstance(class_filter, int):
            rule = lambda d: d[1] == class_filter
        else:
            rule = class_filter
        return self.subset([i for i, j in enumerate(self.samples)
                if rule(self.data_source[j])])

def coordinate_sample(shape, sample_size, seeds, grid=13, seed=1, flat=False):
    '''
    Returns a (end-start) sets of sample_size grid points within
    the shape given.  If the shape dimensions are a multiple of 'grid',
    then sampled points within the same row will never be duplicated.
    '''
    if flat:
        sampind = numpy.zeros((len(seeds), sample_size), dtype=int)
    else:
        sampind = numpy.zeros((len(seeds), 2, sample_size), dtype=int)
    assert sample_size <= grid
    for j, seed in enumerate(seeds):
        rng = numpy.random.RandomState(seed)
        # Shuffle the 169 random grid squares, and pick :sample_size.
        square_count = grid ** len(shape)
        square = numpy.stack(numpy.unravel_index(
            rng.choice(square_count, square_count)[:sample_size],
            (grid,) * len(shape)))
        # Then add a random offset to each x, y and put in the range [0...1)
        # Notice this selects the same locations regardless of resolution.
        uniform = (square + rng.uniform(size=square.shape)) / grid
        # TODO: support affine scaling so that we can align receptive field
        # centers exactly when sampling neurons in different layers.
        coords = (uniform * numpy.array(shape)[:,None]).astype(int)
        # Now take sample_size without replacement.  We do this in a way
        # such that if sample_size is decreased or increased up to 'grid',
        # the selected points become a subset, not totally different points.
        if flat:
            sampind[j] = numpy.ravel_multi_index(coords, dims=shape)
        else:
            sampind[j] = coords
    return sampind

def main():
    from . import parallelfolder
    import argparse, os, shutil

    parser = argparse.ArgumentParser(description='Net dissect utility',
            prog='python -m %s.sampler' % (__package__))
    parser.add_argument('indir')
    parser.add_argument('outdir')
    parser.add_argument('--size', type=int, default=100)
    parser.add_argument('--test', action='store_true', default=False)
    args = parser.parse_args()
    if os.path.exists(args.outdir):
        print('%s already exists' % args.outdir)
        sys.exit(1)
    os.makedirs(args.outdir)
    dataset = parallelfolder.ParallelImageFolders([args.indir])
    sampler = FixedRandomSubsetSampler(dataset, end=args.size)
    seen_filenames = set()
    def number_filename(filename, number):
        if '.' in filename:
            a, b = filename.rsplit('.', 1)
            return a + '_%d.' % number + b
        return filename + '_%d' % number
    for i in sampler.dereference(range(args.size)):
        sourcefile = dataset.images[i][0]
        filename = os.path.basename(sourcefile)
        template = filename
        num = 0
        while filename in seen_filenames:
            num += 1
            filename = number_filename(template, num)
        seen_filenames.add(filename)
        shutil.copy(os.path.join(args.indir, sourcefile),
                os.path.join(args.outdir, filename))

def test():
    from numpy.testing import assert_almost_equal
    # Test that coordinate_sample is deterministic, in-range, and scalable.
    assert_almost_equal(coordinate_sample((26, 26), 10, range(101, 102)),
            [[[14,  0, 12, 11,  8, 13, 11, 20,  7, 20],
              [ 9, 22,  7, 11, 23, 18, 21, 15,  2,  5]]])
    assert_almost_equal(coordinate_sample((13, 13), 10, range(101, 102)),
            [[[ 7,  0,  6,  5,  4,  6,  5, 10,  3, 20 // 2],
              [ 4, 11,  3,  5, 11,  9, 10,  7,  1,  5 // 2]]])
    assert_almost_equal(coordinate_sample((13, 13), 10, range(100, 102),
        flat=True),
            [[  8,  24,  67, 103,  87,  79, 138,  94,  98,  53],
             [ 95,  11,  81,  70,  63,  87,  75, 137,  40, 2+10*13]])
    assert_almost_equal(coordinate_sample((13, 13), 10, range(101, 103),
        flat=True),
            [[ 95,  11,  81,  70,  63,  87,  75, 137,  40, 132],
             [  0,  78, 114, 111,  66,  45,  72,  73,  79, 135]])
    assert_almost_equal(coordinate_sample((26, 26), 10, range(101, 102),
        flat=True),
            [[373,  22, 319, 297, 231, 356, 307, 535, 184, 5+20*26]])
    # Test FixedRandomSubsetSampler
    fss = FixedRandomSubsetSampler(range(10))
    assert len(fss) == 10
    assert_almost_equal(list(fss), [6, 8, 9, 7, 5, 3, 0, 4, 1, 2])
    fss = FixedRandomSubsetSampler(range(10), 3, 8)
    assert len(fss) == 5
    assert_almost_equal(list(fss), [7, 5, 3, 0, 4])
    fss = FixedRandomSubsetSampler([(i, i % 3) for i in range(10)]
            ).class_subset(class_filter=1)
    assert len(fss) == 3
    assert_almost_equal(list(fss), [7, 4, 1])

if __name__ == '__main__':
    import sys
    if '--test' in sys.argv[1:]:
        test()
    else:
        main()


================================================
FILE: seeing/segmenter.py
================================================
# Usage as a simple differentiable segmenter base class

import os, torch, numpy, json, glob
import skimage.morphology
from collections import OrderedDict
from . import upsegmodel
from urllib.request import urlretrieve

class BaseSegmenter:
    def get_label_and_category_names(self):
        '''
        Returns two lists: first, a list of tuples [(label, category), ...]
        where the label and category are human-readable strings indicating
        the meaning of a segmentation class.  The 0th segmentation class
        should be reserved for a label ('-') that means "no prediction."
        The second list should just be a list of [category,...] listing
        all categories in a canonical order.
        '''
        raise NotImplemented()

    def segment_batch(self, tensor_images, downsample=1):
        '''
        Returns a multilabel segmentation for the given batch of (RGB [-1...1])
        images.  Each pixel of the result is a torch.long indicating a
        predicted class number.  Multiple classes can be predicted for
        the same pixel: output shape is (n, multipred, y, x), where
        multipred is 3, 5, or 6, for how many different predicted labels can
        be given for each pixel (depending on whether subdivision is being
        used).  If downsample is specified, then the output y and x dimensions
        are downsampled from the original image.
        '''
        raise NotImplemented()

class UnifiedParsingSegmenter(BaseSegmenter):
    '''
    This is a wrapper for a more complicated multi-class segmenter,
    as described in https://arxiv.org/pdf/1807.10221.pdf, and as
    released in https://github.com/CSAILVision/unifiedparsing.
    For our purposes and to simplify processing, we do not use
    whole-scene predictions, and we only consume part segmentations
    for the three largest object classes (sky, building, person).
    '''

    def __init__(self, segsizes=None):
        # Create a segmentation model
        if segsizes is None:
            segsizes = [256]
        segvocab = 'upp'
        segarch = ('resnet50', 'upernet')
        epoch = 40
        ensure_upp_segmenter_downloaded('datasets/segmodel')
        segmodel = load_unified_parsing_segmentation_model(
                segarch, segvocab, epoch)
        segmodel.cuda()
        self.segmodel = segmodel
        self.segsizes = segsizes
        # Assign class numbers for parts.
        first_partnumber = (1 +
                (len(segmodel.labeldata['object']) - 1) +
                (len(segmodel.labeldata['material']) - 1))
        partobjects = segmodel.labeldata['object_part'].keys()
        partnumbers = {}
        partnames = []
        objectnumbers = {k: v
                for v, k in enumerate(segmodel.labeldata['object'])}
        part_index_translation = []
        # We merge some classes.  For example "door" is both an object
        # and a part of a building.  To avoid confusion, we just count
        # such classes as objects, and add part scores to the same index.
        for owner in partobjects:
            part_list = segmodel.labeldata['object_part'][owner]
            numeric_part_list = []
            for part in part_list:
                if part in objectnumbers:
                    numeric_part_list.append(objectnumbers[part])
                elif part in partnumbers:
                    numeric_part_list.append(partnumbers[part])
                else:
                    partnumbers[part] = len(partnames) + first_partnumber
                    partnames.append(part)
                    numeric_part_list.append(partnumbers[part])
            part_index_translation.append(torch.tensor(numeric_part_list))
        self.objects_with_parts = [objectnumbers[obj] for obj in partobjects]
        self.part_index = part_index_translation
        self.part_names = partnames
        # For now we'll just do object and material labels.
        self.num_classes = 1 + (
                len(segmodel.labeldata['object']) - 1) + (
                len(segmodel.labeldata['material']) - 1) + len(partnames)
        self.num_object_classes = len(self.segmodel.labeldata['object']) - 1

    def get_label_and_category_names(self, dataset=None):
        '''
        Lists label and category names.
        '''
        # Labels are ordered as follows:
        # 0, [object labels] [divided object labels] [materials] [parts]
        # The zero label is reserved to mean 'no prediction'.
        suffixes = []
        divided_labels = []
        for suffix in suffixes:
            divided_labels.extend([('%s-%s' % (label, suffix), 'part')
                for label in self.segmodel.labeldata['object'][1:]])
        # Create the whole list of labels
        labelcats = (
                [(label, 'object')
                    for label in self.segmodel.labeldata['object']] +
                divided_labels +
                [(label, 'material')
                    for label in self.segmodel.labeldata['material'][1:]] +
                [(label, 'part') for label in self.part_names])
        return labelcats, ['object', 'part', 'material']

    def raw_seg_prediction(self, tensor_images, downsample=1):
        '''
        Generates a segmentation by applying multiresolution voting on
        the segmentation model, using (rounded to 32 pixels) a set of
        resolutions in the example benchmark code.
        '''
        y, x = tensor_images.shape[2:]
        b = len(tensor_images)
        tensor_images = (tensor_images + 1) / 2 * 255
        tensor_images = torch.flip(tensor_images, (1,)) # BGR!!!?
        tensor_images -= torch.tensor([102.9801, 115.9465, 122.7717]).to(
                   dtype=tensor_images.dtype, device=tensor_images.device
                   )[None,:,None,None]
        seg_shape = (y // downsample, x // downsample)
        # We want these to be multiples of 32 for the model.
        sizes = [(s, s) for s in self.segsizes]
        pred = {category: torch.zeros(
            len(tensor_images), len(self.segmodel.labeldata[category]),
            seg_shape[0], seg_shape[1]).cuda()
            for category in ['object', 'material']}
        part_pred = {partobj_index: torch.zeros(
            len(tensor_images), len(partindex),
            seg_shape[0], seg_shape[1]).cuda()
            for partobj_index, partindex in enumerate(self.part_index)}
        for size in sizes:
            if size == tensor_images.shape[2:]:
                resized = tensor_images
            else:
                resized = torch.nn.AdaptiveAvgPool2d(size)(tensor_images)
            r_pred = self.segmodel(
                dict(img=resized), seg_size=seg_shape)
            for k in pred:
                pred[k] += r_pred[k]
            for k in part_pred:
                part_pred[k] += r_pred['part'][k]
        return pred, part_pred

    def segment_batch(self, tensor_images, downsample=1):
        '''
        Returns a multilabel segmentation for the given batch of (RGB [-1...1])
        images.  Each pixel of the result is a torch.long indicating a
        predicted class number.  Multiple classes can be predicted for
        the same pixel: output shape is (n, multipred, y, x), where
        multipred is 3, 5, or 6, for how many different predicted labels can
        be given for each pixel (depending on whether subdivision is being
        used).  If downsample is specified, then the output y and x dimensions
        are downsampled from the original image.
        '''
        pred, part_pred = self.raw_seg_prediction(tensor_images,
                downsample=downsample)
        y, x = tensor_images.shape[2:]
        seg_shape = (y // downsample, x // downsample)
        segs = torch.zeros(len(tensor_images), 3, # objects, materials, parts
                seg_shape[0], seg_shape[1],
                dtype=torch.long, device=tensor_images.device)
        _, segs[:,0] = torch.max(pred['object'], dim=1)
        # Get materials and translate to shared numbering scheme
        _, segs[:,1] = torch.max(pred['material'], dim=1)
        maskout = (segs[:,1] == 0)
        segs[:,1] += (len(self.segmodel.labeldata['object']) - 1)
        segs[:,1][maskout] = 0
        # Now deal with subparts of sky, buildings, people
        for i, object_index in enumerate(self.objects_with_parts):
            trans = self.part_index[i].to(segs.device)
            # Get the argmax, and then translate to shared numbering scheme
            seg = trans[torch.max(part_pred[i], dim=1)[1]]
            # Only trust the parts where the prediction also predicts the
            # owning object.
            mask = (segs[:,0] == object_index)
            segs[:,2][mask] = seg[mask]
        return segs

def load_unified_parsing_segmentation_model(segmodel_arch, segvocab, epoch):
    segmodel_dir = 'datasets/segmodel/%s-%s-%s' % ((segvocab,) + segmodel_arch)
    # Load json of class names and part/object structure
    with open(os.path.join(segmodel_dir, 'labels.json')) as f:
        labeldata = json.load(f)
    nr_classes={k: len(labeldata[k])
                for k in ['object', 'scene', 'material']}
    nr_classes['part'] = sum(len(p) for p in labeldata['object_part'].values())
    # Create a segmentation model
    segbuilder = upsegmodel.ModelBuilder()
    # example segmodel_arch = ('resnet101', 'upernet')
    seg_encoder = segbuilder.build_encoder(
            arch=segmodel_arch[0],
            fc_dim=2048,
            weights=os.path.join(segmodel_dir, 'encoder_epoch_%d.pth' % epoch))
    seg_decoder = segbuilder.build_decoder(
            arch=segmodel_arch[1],
            fc_dim=2048, use_softmax=True,
            nr_classes=nr_classes,
            weights=os.path.join(segmodel_dir, 'decoder_epoch_%d.pth' % epoch))
    segmodel = upsegmodel.SegmentationModule(
            seg_encoder, seg_decoder, labeldata)
    segmodel.categories = ['object', 'part', 'material']
    segmodel.eval()
    return segmodel

def ensure_upp_segmenter_downloaded(directory):
    baseurl = 'http://netdissect.csail.mit.edu/data/segmodel'
    dirname = 'upp-resnet50-upernet'
    files = ['decoder_epoch_40.pth', 'encoder_epoch_40.pth', 'labels.json']
    download_dir = os.path.join(directory, dirname)
    os.makedirs(download_dir, exist_ok=True)
    for fn in files:
        if os.path.isfile(os.path.join(download_dir, fn)):
            continue # Skip files already downloaded
        url = '%s/%s/%s' % (baseurl, dirname, fn)
        print('Downloading %s' % url)
        urlretrieve(url, os.path.join(download_dir, fn))
    assert os.path.isfile(os.path.join(directory, dirname, 'labels.json'))

def test_main():
    '''
    Test the unified segmenter.
    '''
    from PIL import Image
    testim = Image.open('script/testdata/test_church_242.jpg')
    tensor_im = (torch.from_numpy(numpy.asarray(testim)).permute(2, 0, 1)
            .float() / 255 * 2 - 1)[None, :, :, :].cuda()
    segmenter = UnifiedParsingSegmenter()
    seg = segmenter.segment_batch(tensor_im)
    bc = torch.bincount(seg.view(-1))
    labels, cats = segmenter.get_label_and_category_names()
    for label in bc.nonzero()[:,0]:
        if label.item():
            # What is the prediction for this class?
            pred, mask = segmenter.predict_single_class(tensor_im, label.item())
            assert mask.sum().item() == bc[label].item()
            assert len(((seg == label).max(1)[0] - mask).nonzero()) == 0
            inside_pred = pred[mask].mean().item()
            outside_pred = pred[~mask].mean().item()
            print('%s (%s, #%d): %d pixels, pred %.2g inside %.2g outside' %
                (labels[label.item()] + (label.item(), bc[label].item(),
                    inside_pred, outside_pred)))

if __name__ == '__main__':
    test_main()


================================================
FILE: seeing/upsegmodel/__init__.py
================================================
from .models import ModelBuilder, SegmentationModule


================================================
FILE: seeing/upsegmodel/models.py
================================================
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from . import resnet, resnext
try:
    from lib.nn import SynchronizedBatchNorm2d
except ImportError:
    from torch.nn import BatchNorm2d as SynchronizedBatchNorm2d


class SegmentationModuleBase(nn.Module):
    def __init__(self):
        super(SegmentationModuleBase, self).__init__()

    @staticmethod
    def pixel_acc(pred, label, ignore_index=-1):
        _, preds = torch.max(pred, dim=1)
        valid = (label != ignore_index).long()
        acc_sum = torch.sum(valid * (preds == label).long())
        pixel_sum = torch.sum(valid)
        acc = acc_sum.float() / (pixel_sum.float() + 1e-10)
        return acc

    @staticmethod
    def part_pixel_acc(pred_part, gt_seg_part, gt_seg_object, object_label, valid):
        mask_object = (gt_seg_object == object_label)
        _, pred = torch.max(pred_part, dim=1)
        acc_sum = mask_object * (pred == gt_seg_part)
        acc_sum = torch.sum(acc_sum.view(acc_sum.size(0), -1), dim=1)
        acc_sum = torch.sum(acc_sum * valid)
        pixel_sum = torch.sum(mask_object.view(mask_object.size(0), -1), dim=1)
        pixel_sum = torch.sum(pixel_sum * valid)
        return acc_sum, pixel_sum 

    @staticmethod
    def part_loss(pred_part, gt_seg_part, gt_seg_object, object_label, valid):
        mask_object = (gt_seg_object == object_label)
        loss = F.nll_loss(pred_part, gt_seg_part * mask_object.long(), reduction='none')
        loss = loss * mask_object.float()
        loss = torch.sum(loss.view(loss.size(0), -1), dim=1)
        nr_pixel = torch.sum(mask_object.view(mask_object.shape[0], -1), dim=1)
        sum_pixel = (nr_pixel * valid).sum()
        loss = (loss * valid.float()).sum() / torch.clamp(sum_pixel, 1).float()
        return loss


class SegmentationModule(SegmentationModuleBase):
    def __init__(self, net_enc, net_dec, labeldata, loss_scale=None):
        super(SegmentationModule, self).__init__()
        self.encoder = net_enc
        self.decoder = net_dec
        self.crit_dict = nn.ModuleDict()
        if loss_scale is None:
            self.loss_scale = {"object": 1, "part": 0.5, "scene": 0.25, "material": 1}
        else:
            self.loss_scale = loss_scale

        # criterion
        self.crit_dict["object"] = nn.NLLLoss(ignore_index=0)  # ignore background 0
        self.crit_dict["material"] = nn.NLLLoss(ignore_index=0)  # ignore background 0
        self.crit_dict["scene"] = nn.NLLLoss(ignore_index=-1)  # ignore unlabelled -1

        # Label data - read from json
        self.labeldata = labeldata
        object_to_num = {k: v for v, k in enumerate(labeldata['object'])}
        part_to_num = {k: v for v, k in enumerate(labeldata['part'])}
        self.object_part = {object_to_num[k]:
                [part_to_num[p] for p in v]
                for k, v in labeldata['object_part'].items()}
        self.object_with_part = sorted(self.object_part.keys())
        self.decoder.object_part = self.object_part
        self.decoder.object_with_part = self.object_with_part

    def forward(self, feed_dict, *, seg_size=None):
        if seg_size is None: # training

            if feed_dict['source_idx'] == 0:
                output_switch = {"object": True, "part": True, "scene": True, "material": False}
            elif feed_dict['source_idx'] == 1:
                output_switch = {"object": False, "part": False, "scene": False, "material": True}
            else:
                raise ValueError

            pred = self.decoder(
                self.encoder(feed_dict['img'], return_feature_maps=True),
                output_switch=output_switch
            )

            # loss
            loss_dict = {}
            if pred['object'] is not None:  # object
                loss_dict['object'] = self.crit_dict['object'](pred['object'], feed_dict['seg_object'])
            if pred['part'] is not None:  # part
                part_loss = 0
                for idx_part, object_label in enumerate(self.object_with_part):
                    part_loss += self.part_loss(
                        pred['part'][idx_part], feed_dict['seg_part'],
                        feed_dict['seg_object'], object_label, feed_dict['valid_part'][:, idx_part])
                loss_dict['part'] = part_loss
            if pred['scene'] is not None:  # scene
                loss_dict['scene'] = self.crit_dict['scene'](pred['scene'], feed_dict['scene_label'])
            if pred['material'] is not None:  # material
                loss_dict['material'] = self.crit_dict['material'](pred['material'], feed_dict['seg_material'])
            loss_dict['total'] = sum([loss_dict[k] * self.loss_scale[k] for k in loss_dict.keys()])

            # metric 
            metric_dict= {}
            if pred['object'] is not None:
                metric_dict['object'] = self.pixel_acc(
                    pred['object'], feed_dict['seg_object'], ignore_index=0)
            if pred['material'] is not None:
                metric_dict['material'] = self.pixel_acc(
                    pred['material'], feed_dict['seg_material'], ignore_index=0)
            if pred['part'] is not None:
                acc_sum, pixel_sum = 0, 0
                for idx_part, object_label in enumerate(self.object_with_part):
                    acc, pixel = self.part_pixel_acc(
                        pred['part'][idx_part], feed_dict['seg_part'], feed_dict['seg_object'],
                        object_label, feed_dict['valid_part'][:, idx_part])
                    acc_sum += acc
                    pixel_sum += pixel
                metric_dict['part'] = acc_sum.float() / (pixel_sum.float() + 1e-10)
            if pred['scene'] is not None:
                metric_dict['scene'] = self.pixel_acc(
                    pred['scene'], feed_dict['scene_label'], ignore_index=-1)

            return {'metric': metric_dict, 'loss': loss_dict}
        else: # inference
            output_switch = {"object": True, "part": True, "scene": True, "material": True}
            pred = self.decoder(self.encoder(feed_dict['img'], return_feature_maps=True),
                                output_switch=output_switch, seg_size=seg_size)
            return pred


def conv3x3(in_planes, out_planes, stride=1, has_bias=False):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=has_bias)


def conv3x3_bn_relu(in_planes, out_planes, stride=1):
    return nn.Sequential(
            conv3x3(in_planes, out_planes, stride),
            SynchronizedBatchNorm2d(out_planes),
            nn.ReLU(inplace=True),
            )


class ModelBuilder:
    def __init__(self):
        pass

    # custom weights initialization
    @staticmethod
    def weights_init(m):
        classname = m.__class__.__name__
        if classname.find('Conv') != -1:
            nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu')
        elif classname.find('BatchNorm') != -1:
            m.weight.data.fill_(1.)
            m.bias.data.fill_(1e-4)
        #elif classname.find('Linear') != -1:
        #    m.weight.data.normal_(0.0, 0.0001)

    def build_encoder(self, arch='resnet50_dilated8', fc_dim=512, weights=''):
        pretrained = True if len(weights) == 0 else False
        if arch == 'resnet50':
            orig_resnet = resnet.__dict__['resnet50'](pretrained=pretrained)
            net_encoder = Resnet(orig_resnet)
        elif arch == 'resnet101':
            orig_resnet = resnet.__dict__['resnet101'](pretrained=pretrained)
            net_encoder = Resnet(orig_resnet)
        elif arch == 'resnext101':
            orig_resnext = resnext.__dict__['resnext101'](pretrained=pretrained)
            net_encoder = Resnet(orig_resnext) # we can still use class Resnet
        else:
            raise Exception('Architecture undefined!')

        # net_encoder.apply(self.weights_init)
        if len(weights) > 0:
            # print('Loading weights for net_encoder')
            net_encoder.load_state_dict(
                torch.load(weights, map_location=lambda storage, loc: storage), strict=False)
        return net_encoder

    def build_decoder(self, nr_classes,
                      arch='ppm_bilinear_deepsup', fc_dim=512,
                      weights='', use_softmax=False):
        if arch == 'upernet_lite':
            net_decoder = UPerNet(
                nr_classes=nr_classes,
                fc_dim=fc_dim,
                use_softmax=use_softmax,
                fpn_dim=256)
        elif arch == 'upernet':
            net_decoder = UPerNet(
                nr_classes=nr_classes,
                fc_dim=fc_dim,
                use_softmax=use_softmax,
                fpn_dim=512)
        else:
            raise Exception('Architecture undefined!')

        net_decoder.apply(self.weights_init)
        if len(weights) > 0:
            # print('Loading weights for net_decoder')
            net_decoder.load_state_dict(
                torch.load(weights, map_location=lambda storage, loc: storage), strict=False)
        return net_decoder


class Resnet(nn.Module):
    def __init__(self, orig_resnet):
        super(Resnet, self).__init__()

        # take pretrained resnet, except AvgPool and FC
        self.conv1 = orig_resnet.conv1
        self.bn1 = orig_resnet.bn1
        self.relu1 = orig_resnet.relu1
        self.conv2 = orig_resnet.conv2
        self.bn2 = orig_resnet.bn2
        self.relu2 = orig_resnet.relu2
        self.conv3 = orig_resnet.conv3
        self.bn3 = orig_resnet.bn3
        self.relu3 = orig_resnet.relu3
        self.maxpool = orig_resnet.maxpool
        self.layer1 = orig_resnet.layer1
        self.layer2 = orig_resnet.layer2
        self.layer3 = orig_resnet.layer3
        self.layer4 = orig_resnet.layer4

    def forward(self, x, return_feature_maps=False):
        conv_out = []

        x = self.relu1(self.bn1(self.conv1(x)))
        x = self.relu2(self.bn2(self.conv2(x)))
        x = self.relu3(self.bn3(self.conv3(x)))
        x = self.maxpool(x)

        x = self.layer1(x); conv_out.append(x);
        x = self.layer2(x); conv_out.append(x);
        x = self.layer3(x); conv_out.append(x);
        x = self.layer4(x); conv_out.append(x);

        if return_feature_maps:
            return conv_out
        return [x]


# upernet
class UPerNet(nn.Module):
    def __init__(self, nr_classes, fc_dim=4096,
                 use_softmax=False, pool_scales=(1, 2, 3, 6),
                 fpn_inplanes=(256,512,1024,2048), fpn_dim=256):
        # Lazy import so that compilation isn't needed if not being used.
        from .prroi_pool import PrRoIPool2D
        super(UPerNet, self).__init__()
        self.use_softmax = use_softmax

        # PPM Module
        self.ppm_pooling = []
        self.ppm_conv = []

        for scale in pool_scales:
            # we use the feature map size instead of input image size, so down_scale = 1.0
            self.ppm_pooling.append(PrRoIPool2D(scale, scale, 1.))
            self.ppm_conv.append(nn.Sequential(
                nn.Conv2d(fc_dim, 512, kernel_size=1, bias=False),
                SynchronizedBatchNorm2d(512),
                nn.ReLU(inplace=True)
            ))
        self.ppm_pooling = nn.ModuleList(self.ppm_pooling)
        self.ppm_conv = nn.ModuleList(self.ppm_conv)
        self.ppm_last_conv = conv3x3_bn_relu(fc_dim + len(pool_scales)*512, fpn_dim, 1)

        # FPN Module
        self.fpn_in = []
        for fpn_inplane in fpn_inplanes[:-1]: # skip the top layer
            self.fpn_in.append(nn.Sequential(
                nn.Conv2d(fpn_inplane, fpn_dim, kernel_size=1, bias=False),
                SynchronizedBatchNorm2d(fpn_dim),
                nn.ReLU(inplace=True)
            ))
        self.fpn_in = nn.ModuleList(self.fpn_in)

        self.fpn_out = []
        for i in range(len(fpn_inplanes) - 1): # skip the top layer
            self.fpn_out.append(nn.Sequential(
                conv3x3_bn_relu(fpn_dim, fpn_dim, 1),
            ))
        self.fpn_out = nn.ModuleList(self.fpn_out)

        self.conv_fusion = conv3x3_bn_relu(len(fpn_inplanes) * fpn_dim, fpn_dim, 1)

        # background included. if ignore in loss, output channel 0 will not be trained.
        self.nr_scene_class, self.nr_object_class, self.nr_part_class, self.nr_material_class = \
            nr_classes['scene'], nr_classes['object'], nr_classes['part'], nr_classes['material']

        # input: PPM out, input_dim: fpn_dim
        self.scene_head = nn.Sequential(
            conv3x3_bn_relu(fpn_dim, fpn_dim, 1),
            nn.AdaptiveAvgPool2d(1),
            nn.Conv2d(fpn_dim, self.nr_scene_class, kernel_size=1, bias=True)
        )

        # input: Fusion out, input_dim: fpn_dim
        self.object_head = nn.Sequential(
            conv3x3_bn_relu(fpn_dim, fpn_dim, 1),
            nn.Conv2d(fpn_dim, self.nr_object_class, kernel_size=1, bias=True)
        )

        # input: Fusion out, input_dim: fpn_dim
        self.part_head = nn.Sequential(
            conv3x3_bn_relu(fpn_dim, fpn_dim, 1),
            nn.Conv2d(fpn_dim, self.nr_part_class, kernel_size=1, bias=True)
        )

        # input: FPN_2 (P2), input_dim: fpn_dim
        self.material_head = nn.Sequential(
            conv3x3_bn_relu(fpn_dim, fpn_dim, 1),
            nn.Conv2d(fpn_dim, self.nr_material_class, kernel_size=1, bias=True)
        )

    def forward(self, conv_out, output_switch=None, seg_size=None):

        output_dict = {k: None for k in output_switch.keys()}

        conv5 = conv_out[-1]
        input_size = conv5.size()
        ppm_out = [conv5]
        roi = [] # fake rois, just used for pooling
        for i in range(input_size[0]): # batch size
            roi.append(torch.Tensor([i, 0, 0, input_size[3], input_size[2]]).view(1, -1)) # b, x0, y0, x1, y1
        roi = torch.cat(roi, dim=0).type_as(conv5)
        ppm_out = [conv5]
        for pool_scale, pool_conv in zip(self.ppm_pooling, self.ppm_conv):
            ppm_out.append(pool_conv(F.interpolate(
                pool_scale(conv5, roi.detach()),
                (input_size[2], input_size[3]),
                mode='bilinear', align_corners=False)))
        ppm_out = torch.cat(ppm_out, 1)
        f = self.ppm_last_conv(ppm_out)

        if output_switch['scene']: # scene
            output_dict['scene'] = self.scene_head(f)

        if output_switch['object'] or output_switch['part'] or output_switch['material']:
            fpn_feature_list = [f]
            for i in reversed(range(len(conv_out) - 1)):
                conv_x = conv_out[i]
                conv_x = self.fpn_in[i](conv_x) # lateral branch

                f = F.interpolate(
                    f, size=conv_x.size()[2:], mode='bilinear', align_corners=False) # top-down branch
                f = conv_x + f

                fpn_feature_list.append(self.fpn_out[i](f))
            fpn_feature_list.reverse() # [P2 - P5]

            # material
            if output_switch['material']:
                output_dict['material'] = self.material_head(fpn_feature_list[0])

            if output_switch['object'] or output_switch['part']:
                output_size = fpn_feature_list[0].size()[2:]
                fusion_list = [fpn_feature_list[0]]
                for i in range(1, len(fpn_feature_list)):
                    fusion_list.append(F.interpolate(
                        fpn_feature_list[i],
                        output_size,
                        mode='bilinear', align_corners=False))
                fusion_out = torch.cat(fusion_list, 1)
                x = self.conv_fusion(fusion_out)

                if output_switch['object']: # object
                    output_dict['object'] = self.object_head(x)
                if output_switch['part']:
                    output_dict['part'] = self.part_head(x)

        if self.use_softmax:  # is True during inference
            # inference scene
            x = output_dict['scene']
            x = x.squeeze(3).squeeze(2)
            x = F.softmax(x, dim=1)
            output_dict['scene'] = x

            # inference object, material
            for k in ['object', 'material']:
                x = output_dict[k]
                x = F.interpolate(x, size=seg_size, mode='bilinear', align_corners=False)
                x = F.softmax(x, dim=1)
                output_dict[k] = x

            # inference part
            x = output_dict['part']
            x = F.interpolate(x, size=seg_size, mode='bilinear', align_corners=False)
            part_pred_list, head = [], 0
            for idx_part, object_label in enumerate(self.object_with_part):
                n_part = len(self.object_part[object_label])
                _x = F.interpolate(x[:, head: head + n_part], size=seg_size, mode='bilinear', align_corners=False)
                _x = F.softmax(_x, dim=1)
                part_pred_list.append(_x)
                head += n_part
            output_dict['part'] = part_pred_list

        else:   # Training
            # object, scene, material
            for k in ['object', 'scene', 'material']:
                if output_dict[k] is None:
                    continue
                x = output_dict[k]
                x = F.log_softmax(x, dim=1)
                if k == "scene":  # for scene
                    x = x.squeeze(3).squeeze(2)
                output_dict[k] = x
            if output_dict['part'] is not None:
                part_pred_list, head = [], 0
                for idx_part, object_label in enumerate(self.object_with_part):
                    n_part = len(self.object_part[object_label])
                    x = output_dict['part'][:, head: head + n_part]
                    x = F.log_softmax(x, dim=1)
                    part_pred_list.append(x)
                    head += n_part
                output_dict['part'] = part_pred_list

        return output_dict


================================================
FILE: seeing/upsegmodel/prroi_pool/.gitignore
================================================
*.o
/_prroi_pooling


================================================
FILE: seeing/upsegmodel/prroi_pool/README.md
================================================
# PreciseRoIPooling
This repo implements the **Precise RoI Pooling** (PrRoI Pooling), proposed in the paper **Acquisition of Localization Confidence for Accurate Object Detection** published at ECCV 2018 (Oral Presentation).

**Acquisition of Localization Confidence for Accurate Object Detection**

_Borui Jiang*, Ruixuan Luo*, Jiayuan Mao*, Tete Xiao, Yuning Jiang_ (* indicates equal contribution.)

https://arxiv.org/abs/1807.11590

## Brief

In short, Precise RoI Pooling is an integration-based (bilinear interpolation) average pooling method for RoI Pooling. It avoids any quantization and has a continuous gradient on bounding box coordinates. It is:

- different from the original RoI Pooling proposed in [Fast R-CNN](https://arxiv.org/abs/1504.08083). PrRoI Pooling uses average pooling instead of max pooling for each bin and has a continuous gradient on bounding box coordinates. That is, one can take the derivatives of some loss function w.r.t the coordinates of each RoI and optimize the RoI coordinates.
- different from the RoI Align proposed in [Mask R-CNN](https://arxiv.org/abs/1703.06870). PrRoI Pooling uses a full integration-based average pooling instead of sampling a constant number of points. This makes the gradient w.r.t. the coordinates continuous.

For a better illustration, we illustrate RoI Pooling, RoI Align and PrRoI Pooing in the following figure. More details including the gradient computation can be found in our paper.

<center><img src="./_assets/prroi_visualization.png" width="80%"></center>

## Implementation

PrRoI Pooling was originally implemented by [Tete Xiao](http://tetexiao.com/) based on MegBrain, an (internal) deep learning framework built by Megvii Inc. It was later adapted into open-source deep learning frameworks. Currently, we only support PyTorch. Unfortunately, we don't have any specific plan for the adaptation into other frameworks such as TensorFlow, but any contributions (pull requests) will be more than welcome.

## Usage (PyTorch 1.0)

In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 1.0+ and only supports CUDA (CPU mode is not implemented).
Since we use PyTorch JIT for cxx/cuda code compilation, to use the module in your code, simply do:

```
from prroi_pool import PrRoIPool2D

avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale)
roi_features = avg_pool(features, rois)

# for those who want to use the "functional"

from prroi_pool.functional import prroi_pool2d
roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale)
```


## Usage (PyTorch 0.4)

**!!! Please first checkout to the branch pytorch0.4.**

In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 0.4 and only supports CUDA (CPU mode is not implemented).
To use the PrRoI Pooling module, first goto `pytorch/prroi_pool` and execute `./travis.sh` to compile the essential components (you may need `nvcc` for this step). To use the module in your code, simply do:

```
from prroi_pool import PrRoIPool2D

avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale)
roi_features = avg_pool(features, rois)

# for those who want to use the "functional"

from prroi_pool.functional import prroi_pool2d
roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale)
```

Here,

- RoI is an `m * 5` float tensor of format `(batch_index, x0, y0, x1, y1)`, following the convention in the original Caffe implementation of RoI Pooling, although in some frameworks the batch indices are provided by an integer tensor.
- `spatial_scale` is multiplied to the RoIs. For example, if your feature maps are down-sampled by a factor of 16 (w.r.t. the input image), you should use a spatial scale of `1/16`.
- The coordinates for RoI follows the [L, R) convension. That is, `(0, 0, 4, 4)` denotes a box of size `4x4`.


================================================
FILE: seeing/upsegmodel/prroi_pool/__init__.py
================================================
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# File   : __init__.py
# Author : Jiayuan Mao, Tete Xiao
# Email  : maojiayuan@gmail.com, jasonhsiao97@gmail.com
# Date   : 07/13/2018
# 
# This file is part of PreciseRoIPooling.
# Distributed under terms of the MIT license.
# Copyright (c) 2017 Megvii Technology Limited.

from .prroi_pool import *


================================================
FILE: seeing/upsegmodel/prroi_pool/build.py
================================================
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# File   : build.py
# Author : Jiayuan Mao, Tete Xiao
# Email  : maojiayuan@gmail.com, jasonhsiao97@gmail.com
# Date   : 07/13/2018
# 
# This file is part of PreciseRoIPooling.
# Distributed under terms of the MIT license.
# Copyright (c) 2017 Megvii Technology Limited.

import os
import torch

from torch.utils.ffi import create_extension

headers = []
sources = []
defines = []
extra_objects = []
with_cuda = False

if torch.cuda.is_available():
    with_cuda = True

    headers+= ['src/prroi_pooling_gpu.h']
    sources += ['src/prroi_pooling_gpu.c']
    defines += [('WITH_CUDA', None)]

    this_file = os.path.dirname(os.path.realpath(__file__))
    extra_objects_cuda = ['src/prroi_pooling_gpu_impl.cu.o']
    extra_objects_cuda = [os.path.join(this_file, fname) for fname in extra_objects_cuda]
    extra_objects.extend(extra_objects_cuda)
else:
    # TODO(Jiayuan Mao @ 07/13): remove this restriction after we support the cpu implementation.
    raise NotImplementedError('Precise RoI Pooling only supports GPU (cuda) implememtations.')

ffi = create_extension(
    '_prroi_pooling',
    headers=headers,
    sources=sources,
    define_macros=defines,
    relative_to=__file__,
    with_cuda=with_cuda,
    extra_objects=extra_objects
)

if __name__ == '__main__':
    ffi.build()


================================================
FILE: seeing/upsegmodel/prroi_pool/functional.py
================================================
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# File   : functional.py
# Author : Jiayuan Mao, Tete Xiao
# Email  : maojiayuan@gmail.com, jasonhsiao97@gmail.com
# Date   : 07/13/2018
#
# This file is part of PreciseRoIPooling.
# Distributed under terms of the MIT license.
# Copyright (c) 2017 Megvii Technology Limited.

import torch
import torch.autograd as ag

try:
    from os.path import join as pjoin, dirname
    from torch.utils.cpp_extension import load as load_extension
    root_dir = pjoin(dirname(__file__), 'src')
    _prroi_pooling = load_extension(
        '_prroi_pooling',
        [pjoin(root_dir, 'prroi_pooling_gpu.c'), pjoin(root_dir, 'prroi_pooling_gpu_impl.cu')],
        verbose=False
    )
except ImportError:
    raise ImportError('Can not compile Precise RoI Pooling library.')

__all__ = ['prroi_pool2d']


class PrRoIPool2DFunction(ag.Function):
    @staticmethod
    def forward(ctx, features, rois, pooled_height, pooled_width, spatial_scale):
        assert 'FloatTensor' in features.type() and 'FloatTensor' in rois.type(), \
                'Precise RoI Pooling only takes float input, got {} for features and {} for rois.'.format(features.type(), rois.type())

        pooled_height = int(pooled_height)
        pooled_width = int(pooled_width)
        spatial_scale = float(spatial_scale)

        features = features.contiguous()
        rois = rois.contiguous()
        params = (pooled_height, pooled_width, spatial_scale)

        if features.is_cuda:
            output = _prroi_pooling.prroi_pooling_forward_cuda(features, rois, *params)
            ctx.params = params
            # everything here is contiguous.
            ctx.save_for_backward(features, rois, output)
        else:
            raise NotImplementedError('Precise RoI Pooling only supports GPU (cuda) implememtations.')

        return output

    @staticmethod
    def backward(ctx, grad_output):
        features, rois, output = ctx.saved_tensors
        grad_input = grad_coor = None

        if features.requires_grad:
            grad_output = grad_output.contiguous()
            grad_input = _prroi_pooling.prroi_pooling_backward_cuda(features, rois, output, grad_output, *ctx.params)
        if rois.requires_grad:
            grad_output = grad_output.contiguous()
            grad_coor = _prroi_pooling.prroi_pooling_coor_backward_cuda(features, rois, output, grad_output, *ctx.params)

        return grad_input, grad_coor, None, None, None


prroi_pool2d = PrRoIPool2DFunction.apply


================================================
FILE: seeing/upsegmodel/prroi_pool/prroi_pool.py
================================================
#! /usr/bin/env python3
# -*- coding: utf-8 -*-
# File   : prroi_pool.py
# Author : Jiayuan Mao, Tete Xiao
# Email  : maojiayuan@gmail.com, jasonhsiao97@gmail.com
# Date   : 07/13/2018
# 
# This file is part of PreciseRoIPooling.
# Distributed under terms of the MIT license.
# Copyright (c) 2017 Megvii Technology Limited.

import torch.nn as nn

from .functional import prroi_pool2d

__all__ = ['PrRoIPool2D']


class PrRoIPool2D(nn.Module):
    def __init__(self, pooled_height, pooled_width, spatial_scale):
        super().__init__()

        self.pooled_height = int(pooled_height)
        self.pooled_width = int(pooled_width)
        self.spatial_scale = float(spatial_scale)

    def forward(self, features, rois):
        return prroi_pool2d(features, rois, self.pooled_height, self.pooled_width, self.spatial_scale)


================================================
FILE: seeing/upsegmodel/prroi_pool/src/prroi_pooling_gpu.c
================================================
/*
 * File   : prroi_pooling_gpu.c
 * Author : Jiayuan Mao, Tete Xiao
 * Email  : maojiayuan@gmail.com, jasonhsiao97@gmail.com
 * Date   : 07/13/2018
 *
 * Distributed under terms of the MIT license.
 * Copyright (c) 2017 Megvii Technology Limited.
 */

#include <math.h>
#include <torch/extension.h>

#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>

#include <THC/THC.h>

#include "prroi_pooling_gpu_impl.cuh"


at::Tensor prroi_pooling_forward_cuda(const at::Tensor &features, const at::Tensor &rois, int pooled_height, int pooled_width, float spatial_scale) {
    int nr_rois = rois.size(0);
    int nr_channels = features.size(1);
    int height = features.size(2);
    int width = features.size(3);
    int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
    auto output = at::zeros({nr_rois, nr_channels, pooled_height, pooled_width}, features.options());

    if (output.numel() == 0) {
        THCudaCheck(cudaGetLastError());
        return output;
    }

    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
    PrRoIPoolingForwardGpu(
        stream, features.data<float>(), rois.data<float>(), output.data<float>(),
        nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
        top_count
    );

    THCudaCheck(cudaGetLastError());
    return output;
}

at::Tensor prroi_pooling_backward_cuda(
    const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff,
    int pooled_height, int pooled_width, float spatial_scale) {

    auto features_diff = at::zeros_like(features);

    int nr_rois = rois.size(0);
    int batch_size = features.size(0);
    int nr_channels = features.size(1);
    int height = features.size(2);
    int width = features.size(3);
    int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
    int bottom_count = batch_size * nr_channels * height * width;

    if (output.numel() == 0) {
        THCudaCheck(cudaGetLastError());
        return features_diff;
    }

    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
    PrRoIPoolingBackwardGpu(
        stream,
        features.data<float>(), rois.data<float>(), output.data<float>(), output_diff.data<float>(),
        features_diff.data<float>(),
        nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
        top_count, bottom_count
    );

    THCudaCheck(cudaGetLastError());
    return features_diff;
}

at::Tensor prroi_pooling_coor_backward_cuda(
    const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff,
    int pooled_height, int pooled_width, float spatial_scale) {

    auto coor_diff = at::zeros_like(rois);

    int nr_rois = rois.size(0);
    int nr_channels = features.size(1);
    int height = features.size(2);
    int width = features.size(3);
    int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
    int bottom_count = nr_rois * 5;

    if (output.numel() == 0) {
        THCudaCheck(cudaGetLastError());
        return coor_diff;
    }

    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
    PrRoIPoolingCoorBackwardGpu(
        stream,
        features.data<float>(), rois.data<float>(), output.data<float>(), output_diff.data<float>(),
        coor_diff.data<float>(),
        nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
        top_count, bottom_count
    );

    THCudaCheck(cudaGetLastError());
    return coor_diff;
}

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
    m.def("prroi_pooling_forward_cuda", &prroi_pooling_forward_cuda, "PRRoIPooling_forward");
    m.def("prroi_pooling_backward_cuda", &prroi_pooling_backward_cuda, "PRRoIPooling_backward");
    m.def("prroi_pooling_coor_backward_cuda", &prroi_pooling_coor_backward_cuda, "PRRoIPooling_backward_coor");
}


================================================
FILE: seeing/upsegmodel/prroi_pool/src/prroi_pooling_gpu.h
================================================
/*
 * File   : prroi_pooling_gpu.h
 * Author : Jiayuan Mao, Tete Xiao
 * Email  : maojiayuan@gmail.com, jasonhsiao97@gmail.com 
 * Date   : 07/13/2018
 * 
 * Distributed under terms of the MIT license.
 * Copyright (c) 2017 Megvii Technology Limited.
 */

int prroi_pooling_forward_cuda(THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, int pooled_height, int pooled_width, float spatial_scale);

int prroi_pooling_backward_cuda(
    THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff,
    int pooled_height, int pooled_width, float spatial_scale
);

int prroi_pooling_coor_backward_cuda(
    THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff,
    int pooled_height, int pooled_width, float spatial_scal
);


================================================
FILE: seeing/upsegmodel/prroi_pool/src/prroi_pooling_gpu_impl.cu
================================================
/*
 * File   : prroi_pooling_gpu_impl.cu
 * Author : Tete Xiao, Jiayuan Mao
 * Email  : jasonhsiao97@gmail.com
 *
 * Distributed under terms of the MIT license.
 * Copyright (c) 2017 Megvii Technology Limited.
 */

#include "prroi_pooling_gpu_impl.cuh"

#include <cstdio>
#include <cfloat>

#define CUDA_KERNEL_LOOP(i, n) \
    for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
        i < (n); \
        i += blockDim.x * gridDim.x)

#define CUDA_POST_KERNEL_CHECK \
    do { \
        cudaError_t err = cudaGetLastError(); \
        if (cudaSuccess != err) { \
            fprintf(stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString(err)); \
            exit(-1); \
        } \
    } while(0)

#define CUDA_NUM_THREADS 512

namespace {

static int CUDA_NUM_BLOCKS(const int N) {
  return (N + CUDA_NUM_THREADS - 1) / CUDA_NUM_THREADS;
}

__device__ static float PrRoIPoolingGetData(F_DEVPTR_IN data, const int h, const int w, const int height, const int width)
{
    bool overflow = (h < 0) || (w < 0) || (h >= height) || (w >= width);
    float retVal = overflow ? 0.0f : data[h * width + w];
    return retVal;
}

__device__ static float PrRoIPoolingGetCoeff(float dh, float dw){
    dw = dw > 0 ? dw : -dw;
    dh = dh > 0 ? dh : -dh;
    return (1.0f - dh) * (1.0f - dw);
}

__device__ static float PrRoIPoolingSingleCoorIntegral(float s, float t, float c1, float c2) {
    return 0.5 * (t * t - s * s) * c2 + (t - 0.5 * t * t - s + 0.5 * s * s) * c1;
}

__device__ static float PrRoIPoolingInterpolation(F_DEVPTR_IN data, const float h, const float w, const int height, const int width){
    float retVal = 0.0f;
    int h1 = floorf(h);
    int w1 = floorf(w);
    retVal += PrRoIPoolingGetData(data, h1, w1, height, width) * PrRoIPoolingGetCoeff(h - float(h1), w - float(w1));
    h1 = floorf(h)+1;
    w1 = floorf(w);
    retVal += PrRoIPoolingGetData(data, h1, w1, height, width) * PrRoIPoolingGetCoeff(h - float(h1), w - float(w1));
    h1 = floorf(h);
    w1 = floorf(w)+1;
    retVal += PrRoIPoolingGetData(data, h1, w1, height, width) * PrRoIPoolingGetCoeff(h - float(h1), w - float(w1));
    h1 = floorf(h)+1;
    w1 = floorf(w)+1;
    retVal += PrRoIPoolingGetData(data, h1, w1, height, width) * PrRoIPoolingGetCoeff(h - float(h1), w - float(w1));
    return retVal;
}

__device__ static float PrRoIPoolingMatCalculation(F_DEVPTR_IN this_data, const int s_h, const int s_w, const int e_h, const int e_w,
        const float y0, const float x0, const float y1, const float x1, const int h0, const int w0)
{
    float alpha, beta, lim_alpha, lim_beta, tmp;
    float sum_out = 0;

    alpha = x0 - float(s_w);
    beta = y0 - float(s_h);
    lim_alpha = x1 - float(s_w);
    lim_beta = y1 - float(s_h);
    tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + 0.5f * alpha * alpha)
        * (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta);
    sum_out += PrRoIPoolingGetData(this_data, s_h, s_w, h0, w0) * tmp;

    alpha = float(e_w) - x1;
    lim_alpha = float(e_w) - x0;
    tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + 0.5f * alpha * alpha)
        * (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta);
    sum_out += PrRoIPoolingGetData(this_data, s_h, e_w, h0, w0) * tmp;

    alpha = x0 - float(s_w);
    beta = float(e_h) - y1;
    lim_alpha = x1 - float(s_w);
    lim_beta = float(e_h) - y0;
    tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + 0.5f * alpha * alpha)
        * (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta);
    sum_out += PrRoIPoolingGetData(this_data, e_h, s_w, h0, w0) * tmp;

    alpha = float(e_w) - x1;
    lim_alpha = float(e_w) - x0;
    tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + 0.5f * alpha * alpha)
        * (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta);
    sum_out += PrRoIPoolingGetData(this_data, e_h, e_w, h0, w0) * tmp;

    return sum_out;
}

__device__ static void PrRoIPoolingDistributeDiff(F_DEVPTR_OUT diff, const float top_diff, const int h, const int w, const int height, const int width, const float coeff)
{
    bool overflow = (h < 0) || (w < 0) || (h >= height) || (w >= width);
    if (!overflow)
        atomicAdd(diff + h * width + w, top_diff * coeff);
}

__device__ static void PrRoIPoolingMatDistributeDiff(F_DEVPTR_OUT diff, const float top_diff, const int s_h, const int s_w, const int e_h, const int e_w,
        const float y0, const float x0, const float y1, const float x1, const int h0, const int w0)
{
    float alpha, beta, lim_alpha, lim_beta, tmp;

    alpha = x0 - float(s_w);
    beta = y0 - float(s_h);
    lim_alpha = x1 - float(s_w);
    lim_beta = y1 - float(s_h);
    tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + 0.5f * alpha * alpha)
        * (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta);
    PrRoIPoolingDistributeDiff(diff, top_diff, s_h, s_w, h0, w0, tmp);

    alpha = float(e_w) - x1;
    lim_alpha = float(e_w) - x0;
    tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + 0.5f * alpha * alpha)
        * (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta);
    PrRoIPoolingDistributeDiff(diff, top_diff, s_h, e_w, h0, w0, tmp);

    alpha = x0 - float(s_w);
    beta = float(e_h) - y1;
    lim_alpha = x1 - float(s_w);
    lim_beta = float(e_h) - y0;
    tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + 0.5f * alpha * alpha)
        * (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta);
    PrRoIPoolingDistributeDiff(diff, top_diff, e_h, s_w, h0, w0, tmp);

    alpha = float(e_w) - x1;
    lim_alpha = float(e_w) - x0;
    tmp = (lim_alpha - 0.5f * lim_alpha * lim_alpha - alpha + 0.5f * alpha * alpha)
        * (lim_beta - 0.5f * lim_beta * lim_beta - beta + 0.5f * beta * beta);
    PrRoIPoolingDistributeDiff(diff, top_diff, e_h, e_w, h0, w0, tmp);
}

__global__ void PrRoIPoolingForward(
        const int nthreads,
        F_DEVPTR_IN bottom_data,
        F_DEVPTR_IN bottom_rois,
        F_DEVPTR_OUT top_data,
        const int channels,
        const int height,
        const int width,
        const int pooled_height,
        const int pooled_width,
        const float spatial_scale) {

  CUDA_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;

    bottom_rois += n * 5;
    int roi_batch_ind = bottom_rois[0];

    float roi_start_w = bottom_rois[1] * spatial_scale;
    float roi_start_h = bottom_rois[2] * spatial_scale;
    float roi_end_w = bottom_rois[3] * spatial_scale;
    float roi_end_h = bottom_rois[4] * spatial_scale;

    float roi_width = max(roi_end_w - roi_start_w, ((float)0.0));
    float roi_height = max(roi_end_h - roi_start_h, ((float)0.0));
    float bin_size_h = roi_height / static_cast<float>(pooled_height);
    float bin_size_w = roi_width / static_cast<float>(pooled_width);

    const float *this_data = bottom_data + (roi_batch_ind * channels + c) * height * width;
    float *this_out = top_data + index;

    float win_start_w = roi_start_w + bin_size_w * pw;
    float win_start_h = roi_start_h + bin_size_h * ph;
    float win_end_w = win_start_w + bin_size_w;
    float win_end_h = win_start_h + bin_size_h;

    float win_size = max(float(0.0), bin_size_w * bin_size_h);
    if (win_size == 0) {
        *this_out = 0;
        return;
    }

    float sum_out = 0;

    int s_w, s_h, e_w, e_h;

    s_w = floorf(win_start_w);
    e_w = ceilf(win_end_w);
    s_h = floorf(win_start_h);
    e_h = ceilf(win_end_h);

    for (int w_iter = s_w; w_iter < e_w; ++w_iter)
        for (int h_iter = s_h; h_iter < e_h; ++h_iter)
            sum_out += PrRoIPoolingMatCalculation(this_data, h_iter, w_iter, h_iter + 1, w_iter + 1,
                max(win_start_h, float(h_iter)), max(win_start_w, float(w_iter)),
                min(win_end_h, float(h_iter) + 1.0), min(win_end_w, float(w_iter + 1.0)),
                height, width);
    *this_out = sum_out / win_size;
  }
}

__global__ void PrRoIPoolingBackward(
        const int nthreads,
        F_DEVPTR_IN bottom_rois,
        F_DEVPTR_IN top_diff,
        F_DEVPTR_OUT bottom_diff,
        const int channels,
        const int height,
        const int width,
        const int pooled_height,
        const int pooled_width,
        const float spatial_scale) {

  CUDA_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;
    bottom_rois += n * 5;

    int roi_batch_ind = bottom_rois[0];
    float roi_start_w = bottom_rois[1] * spatial_scale;
    float roi_start_h = bottom_rois[2] * spatial_scale;
    float roi_end_w = bottom_rois[3] * spatial_scale;
    float roi_end_h = bottom_rois[4] * spatial_scale;

    float roi_width = max(roi_end_w - roi_start_w, (float)0);
    float roi_height = max(roi_end_h - roi_start_h, (float)0);
    float bin_size_h = roi_height / static_cast<float>(pooled_height);
    float bin_size_w = roi_width / static_cast<float>(pooled_width);

    const float *this_out_grad = top_diff + index;
    float *this_data_grad = bottom_diff + (roi_batch_ind * channels + c) * height * width;

    float win_start_w = roi_start_w + bin_size_w * pw;
    float win_start_h = roi_start_h + bin_size_h * ph;
    float win_end_w = win_start_w + bin_size_w;
    float win_end_h = win_start_h + bin_size_h;

    float win_size = max(float(0.0), bin_size_w * bin_size_h);

    float sum_out = win_size == float(0) ? float(0) : *this_out_grad / win_size;

    int s_w, s_h, e_w, e_h;

    s_w = floorf(win_start_w);
    e_w = ceilf(win_end_w);
    s_h = floorf(win_start_h);
    e_h = ceilf(win_end_h);

    for (int w_iter = s_w; w_iter < e_w; ++w_iter)
        for (int h_iter = s_h; h_iter < e_h; ++h_iter)
            PrRoIPoolingMatDistributeDiff(this_data_grad, sum_out, h_iter, w_iter, h_iter + 1, w_iter + 1,
                max(win_start_h, float(h_iter)), max(win_start_w, float(w_iter)),
                min(win_end_h, float(h_iter) + 1.0), min(win_end_w, float(w_iter + 1.0)),
                height, width);

  }
}

__global__ void PrRoIPoolingCoorBackward(
        const int nthreads,
        F_DEVPTR_IN bottom_data,
        F_DEVPTR_IN bottom_rois,
        F_DEVPTR_IN top_data,
        F_DEVPTR_IN top_diff,
        F_DEVPTR_OUT bottom_diff,
        const int channels,
        const int height,
        const int width,
        const int pooled_height,
        const int pooled_width,
        const float spatial_scale) {

  CUDA_KERNEL_LOOP(index, nthreads) {
    // (n, c, ph, pw) is an element in the pooled output
    int pw = index % pooled_width;
    int ph = (index / pooled_width) % pooled_height;
    int c = (index / pooled_width / pooled_height) % channels;
    int n = index / pooled_width / pooled_height / channels;
    bottom_rois += n * 5;

    int roi_batch_ind = bottom_rois[0];
    float roi_start_w = bottom_rois[1] * spatial_scale;
    float roi_start_h = bottom_rois[2] * spatial_scale;
    float roi_end_w = bottom_rois[3] * spatial_scale;
    float roi_end_h = bottom_rois[4] * spatial_scale;

    float roi_width = max(roi_end_w - roi_start_w, (float)0);
    float roi_height = max(roi_end_h - roi_start_h, (float)0);
    float bin_size_h = roi_height / static_cast<float>(pooled_height);
    float bin_size_w = roi_width / static_cast<float>(pooled_width);

    const float *this_out_grad = top_diff + index;
    const float *this_bottom_data = bottom_data + (roi_batch_ind * channels + c) * height * width;
    const float *this_top_data = top_data + index;
    float *this_data_grad = bottom_diff + n * 5;

    float win_start_w = roi_start_w + bin_size_w * pw;
    float win_start_h = roi_start_h + bin_size_h * ph;
    float win_end_w = win_start_w + bin_size_w;
    float win_end_h = win_start_h + bin_size_h;

    float win_size = max(float(0.0), bin_size_w * bin_size_h);

    float sum_out = win_size == float(0) ? float(0) : *this_out_grad / win_size;

    // WARNING: to be discussed
    if (sum_out == 0)
        return;

    int s_w, s_h, e_w, e_h;

    s_w = floorf(win_start_w);
    e_w = ceilf(win_end_w);
    s_h = floorf(win_start_h);
    e_h = ceilf(win_end_h);

    float g_x1_y = 0, g_x2_y = 0, g_x_y1 = 0, g_x_y2 = 0;
    for (int h_iter = s_h; h_iter < e_h; ++h_iter) {
        g_x1_y += PrRoIPoolingSingleCoorIntegral(max(win_start_h, float(h_iter)) - h_iter,
                min(win_end_h, float(h_iter + 1)) - h_iter,
                PrRoIPoolingInterpolation(this_bottom_data, h_iter, win_start_w, height, width),
                PrRoIPoolingInterpolation(this_bottom_data, h_iter + 1, win_start_w, height, width));

        g_x2_y += PrRoIPoolingSingleCoorIntegral(max(win_start_h, float(h_iter)) - h_iter,
                min(win_end_h, float(h_iter + 1)) - h_iter,
                PrRoIPoolingInterpolation(this_bottom_data, h_iter, win_end_w, height, width),
                PrRoIPoolingInterpolation(this_bottom_data, h_iter + 1, win_end_w, height, width));
    }

    for (int w_iter = s_w; w_iter < e_w; ++w_iter) {
        g_x_y1 += PrRoIPoolingSingleCoorIntegral(max(win_start_w, float(w_iter)) - w_iter,
                min(win_end_w, float(w_iter + 1)) - w_iter,
                PrRoIPoolingInterpolation(this_bottom_data, win_start_h, w_iter, height, width),
                PrRoIPoolingInterpolation(this_bottom_data, win_start_h, w_iter + 1, height, width));

        g_x_y2 += PrRoIPoolingSingleCoorIntegral(max(win_start_w, float(w_iter)) - w_iter,
                min(win_end_w, float(w_iter + 1)) - w_iter,
                PrRoIPoolingInterpolation(this_bottom_data, win_end_h, w_iter, height, width),
                PrRoIPoolingInterpolation(this_bottom_data, win_end_h, w_iter + 1, height, width));
    }

    float partial_x1 = -g_x1_y + (win_end_h - win_start_h) * (*this_top_data);
    float partial_y1 = -g_x_y1 + (win_end_w - win_start_w) * (*this_top_data);
    float partial_x2 = g_x2_y - (win_end_h - win_start_h) * (*this_top_data);
    float partial_y2 = g_x_y2 - (win_end_w - win_start_w) * (*this_top_data);

    partial_x1 = partial_x1 / win_size * spatial_scale;
    partial_x2 = partial_x2 / win_size * spatial_scale;
    partial_y1 = partial_y1 / win_size * spatial_scale;
    partial_y2 = partial_y2 / win_size * spatial_scale;

    // (b, x1, y1, x2, y2)

    this_data_grad[0] = 0;
    atomicAdd(this_data_grad + 1, (partial_x1 * (1.0 - float(pw) / pooled_width) + partial_x2 * (1.0 - float(pw + 1) / pooled_width))
            * (*this_out_grad));
    atomicAdd(this_data_grad + 2, (partial_y1 * (1.0 - float(ph) / pooled_height) + partial_y2 * (1.0 - float(ph + 1) / pooled_height))
            * (*this_out_grad));
    atomicAdd(this_data_grad + 3, (partial_x2 * float(pw + 1) / pooled_width + partial_x1 * float(pw) / pooled_width)
            * (*this_out_grad));
    atomicAdd(this_data_grad + 4, (partial_y2 * float(ph + 1) / pooled_height + partial_y1 * float(ph) / pooled_height)
            * (*this_out_grad));
  }
}

} /* !anonymous namespace */

#ifdef __cplusplus
extern "C" {
#endif

void PrRoIPoolingForwardGpu(
    cudaStream_t stream,
    F_DEVPTR_IN bottom_data,
    F_DEVPTR_IN bottom_rois,
    F_DEVPTR_OUT top_data,
    const int channels_, const int height_, const int width_,
    const int pooled_height_, const int pooled_width_,
    const float spatial_scale_,
    const int top_count) {

    PrRoIPoolingForward<<<CUDA_NUM_BLOCKS(top_count), CUDA_NUM_THREADS, 0, stream>>>(
        top_count, bottom_data, bottom_rois, top_data,
        channels_, height_, width_, pooled_height_, pooled_width_, spatial_scale_);

    CUDA_POST_KERNEL_CHECK;
}

void PrRoIPoolingBackwardGpu(
    cudaStream_t stream,
    F_DEVPTR_IN bottom_data,
    F_DEVPTR_IN bottom_rois,
    F_DEVPTR_IN top_data,
    F_DEVPTR_IN top_diff,
    F_DEVPTR_OUT bottom_diff,
    const int channels_, const int height_, const int width_,
    const int pooled_height_, const int pooled_width_,
    const float spatial_scale_,
    const int top_count, const int bottom_count) {

    cudaMemsetAsync(bottom_diff, 0, sizeof(float) * bottom_count, stream);
    PrRoIPoolingBackward<<<CUDA_NUM_BLOCKS(top_count), CUDA_NUM_THREADS, 0, stream>>>(
        top_count, bottom_rois, top_diff, bottom_diff,
        channels_, height_, width_, pooled_height_, pooled_width_, spatial_scale_);
    CUDA_POST_KERNEL_CHECK;
}

void PrRoIPoolingCoorBackwardGpu(
    cudaStream_t stream,
    F_DEVPTR_IN bottom_data,
    F_DEVPTR_IN bottom_rois,
    F_DEVPTR_IN top_data,
    F_DEVPTR_IN top_diff,
    F_DEVPTR_OUT bottom_diff,
    const int channels_, const int height_, const int width_,
    const int pooled_height_, const int pooled_width_,
    const float spatial_scale_,
    const int top_count, const int bottom_count) {

    cudaMemsetAsync(bottom_diff, 0, sizeof(float) * bottom_count, stream);
    PrRoIPoolingCoorBackward<<<CUDA_NUM_BLOCKS(top_count), CUDA_NUM_THREADS, 0, stream>>>(
        top_count, bottom_data, bottom_rois, top_data, top_diff, bottom_diff,
        channels_, height_, width_, pooled_height_, pooled_width_, spatial_scale_);
    CUDA_POST_KERNEL_CHECK;
}

} /* !extern "C" */


================================================
FILE: seeing/upsegmodel/prroi_pool/src/prroi_pooling_gpu_impl.cuh
================================================
/*
 * File   : prroi_pooling_gpu_impl.cuh
 * Author : Tete Xiao, Jiayuan Mao
 * Email  : jasonhsiao97@gmail.com
 *
 * Distributed under terms of the MIT license.
 * Copyright (c) 2017 Megvii Technology Limited.
 */

#ifndef PRROI_POOLING_GPU_IMPL_CUH
#define PRROI_POOLING_GPU_IMPL_CUH

#ifdef __cplusplus
extern "C" {
#endif

#define F_DEVPTR_IN const float *
#define F_DEVPTR_OUT float *

void PrRoIPoolingForwardGpu(
    cudaStream_t stream,
    F_DEVPTR_IN bottom_data,
    F_DEVPTR_IN bottom_rois,
    F_DEVPTR_OUT top_data,
    const int channels_, const int height_, const int width_,
    const int pooled_height_, const int pooled_width_,
    const float spatial_scale_,
    const int top_count);

void PrRoIPoolingBackwardGpu(
    cudaStream_t stream,
    F_DEVPTR_IN bottom_data,
    F_DEVPTR_IN bottom_rois,
    F_DEVPTR_IN top_data,
    F_DEVPTR_IN top_diff,
    F_DEVPTR_OUT bottom_diff,
    const int channels_, const int height_, const int width_,
    const int pooled_height_, const int pooled_width_,
    const float spatial_scale_,
    const int top_count, const int bottom_count);

void PrRoIPoolingCoorBackwardGpu(
    cudaStream_t stream,
    F_DEVPTR_IN bottom_data,
    F_DEVPTR_IN bottom_rois,
    F_DEVPTR_IN top_data,
    F_DEVPTR_IN top_diff,
    F_DEVPTR_OUT bottom_diff,
    const int channels_, const int height_, const int width_,
    const int pooled_height_, const int pooled_width_,
    const float spatial_scale_,
    const int top_count, const int bottom_count);

#ifdef __cplusplus
} /* !extern "C" */
#endif

#endif /* !PRROI_POOLING_GPU_IMPL_CUH */


================================================
FILE: seeing/upsegmodel/prroi_pool/test_prroi_pooling2d.py
================================================
# -*- coding: utf-8 -*-
# File   : test_prroi_pooling2d.py
# Author : Jiayuan Mao
# Email  : maojiayuan@gmail.com
# Date   : 18/02/2018
#
# This file is part of Jacinle.

import unittest

import torch
import torch.nn as nn
import torch.nn.functional as F

from jactorch.utils.unittest import TorchTestCase

from prroi_pool import PrRoIPool2D


class TestPrRoIPool2D(TorchTestCase):
    def test_forward(self):
        pool = PrRoIPool2D(7, 7, spatial_scale=0.5)
        features = torch.rand((4, 16, 24, 32)).cuda()
        rois = torch.tensor([
            [0, 0, 0, 14, 14],
            [1, 14, 14, 28, 28],
        ]).float().cuda()

        out = pool(features, rois)
        out_gold = F.avg_pool2d(features, kernel_size=2, stride=1)

        self.assertTensorClose(out, torch.stack((
            out_gold[0, :, :7, :7],
            out_gold[1, :, 7:14, 7:14],
        ), dim=0))

    def test_backward_shapeonly(self):
        pool = PrRoIPool2D(2, 2, spatial_scale=0.5)

        features = torch.rand((4, 2, 24, 32)).cuda()
        rois = torch.tensor([
            [0, 0, 0, 4, 4],
            [1, 14, 14, 18, 18],
        ]).float().cuda()
        features.requires_grad = rois.requires_grad = True
        out = pool(features, rois)

        loss = out.sum()
        loss.backward()

        self.assertTupleEqual(features.size(), features.grad.size())
        self.assertTupleEqual(rois.size(), rois.grad.size())


if __name__ == '__main__':
    unittest.main()


================================================
FILE: seeing/upsegmodel/resnet.py
================================================
import os
import sys
import torch
import torch.nn as nn
import math
try:
    from lib.nn import SynchronizedBatchNorm2d
except ImportError:
    from torch.nn import BatchNorm2d as SynchronizedBatchNorm2d

try:
    from urllib import urlretrieve
except ImportError:
    from urllib.request import urlretrieve


__all__ = ['ResNet', 'resnet50', 'resnet101'] # resnet101 is coming soon!


model_urls = {
    'resnet50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet50-imagenet.pth',
    'resnet101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnet101-imagenet.pth'
}


def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = SynchronizedBatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = SynchronizedBatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    expansion = 4

    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = SynchronizedBatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn2 = SynchronizedBatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
        self.bn3 = SynchronizedBatchNorm2d(planes * 4)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000):
        self.inplanes = 128
        super(ResNet, self).__init__()
        self.conv1 = conv3x3(3, 64, stride=2)
        self.bn1 = SynchronizedBatchNorm2d(64)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(64, 64)
        self.bn2 = SynchronizedBatchNorm2d(64)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv3 = conv3x3(64, 128)
        self.bn3 = SynchronizedBatchNorm2d(128)
        self.relu3 = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, SynchronizedBatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                SynchronizedBatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu1(self.bn1(self.conv1(x)))
        x = self.relu2(self.bn2(self.conv2(x)))
        x = self.relu3(self.bn3(self.conv3(x)))
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

'''
def resnet18(pretrained=False, **kwargs):
    """Constructs a ResNet-18 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on Places
    """
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
    if pretrained:
        model.load_state_dict(load_url(model_urls['resnet18']))
    return model


def resnet34(pretrained=False, **kwargs):
    """Constructs a ResNet-34 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on Places
    """
    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
    if pretrained:
        model.load_state_dict(load_url(model_urls['resnet34']))
    return model
'''

def resnet50(pretrained=False, **kwargs):
    """Constructs a ResNet-50 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on Places
    """
    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
    if pretrained:
        model.load_state_dict(load_url(model_urls['resnet50']), strict=False)
    return model


def resnet101(pretrained=False, **kwargs):
    """Constructs a ResNet-101 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on Places
    """
    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
    if pretrained:
        model.load_state_dict(load_url(model_urls['resnet101']), strict=False)
    return model

# def resnet152(pretrained=False, **kwargs):
#     """Constructs a ResNet-152 model.
#
#     Args:
#         pretrained (bool): If True, returns a model pre-trained on Places
#     """
#     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
#     if pretrained:
#         model.load_state_dict(load_url(model_urls['resnet152']))
#     return model

def load_url(url, model_dir='./pretrained', map_location=None):
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    filename = url.split('/')[-1]
    cached_file = os.path.join(model_dir, filename)
    if not os.path.exists(cached_file):
        sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
        urlretrieve(url, cached_file)
    return torch.load(cached_file, map_location=map_location)


================================================
FILE: seeing/upsegmodel/resnext.py
================================================
import os
import sys
import torch
import torch.nn as nn
import math
try:
    from lib.nn import SynchronizedBatchNorm2d
except ImportError:
    from torch.nn import BatchNorm2d as SynchronizedBatchNorm2d

try:
    from urllib import urlretrieve
except ImportError:
    from urllib.request import urlretrieve


__all__ = ['ResNeXt', 'resnext101'] # support resnext 101


model_urls = {
    #'resnext50': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext50-imagenet.pth',
    'resnext101': 'http://sceneparsing.csail.mit.edu/model/pretrained_resnet/resnext101-imagenet.pth'
}


def conv3x3(in_planes, out_planes, stride=1):
    "3x3 convolution with padding"
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)


class GroupBottleneck(nn.Module):
    expansion = 2

    def __init__(self, inplanes, planes, stride=1, groups=1, downsample=None):
        super(GroupBottleneck, self).__init__()
        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
        self.bn1 = SynchronizedBatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                               padding=1, groups=groups, bias=False)
        self.bn2 = SynchronizedBatchNorm2d(planes)
        self.conv3 = nn.Conv2d(planes, planes * 2, kernel_size=1, bias=False)
        self.bn3 = SynchronizedBatchNorm2d(planes * 2)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)

        return out


class ResNeXt(nn.Module):

    def __init__(self, block, layers, groups=32, num_classes=1000):
        self.inplanes = 128
        super(ResNeXt, self).__init__()
        self.conv1 = conv3x3(3, 64, stride=2)
        self.bn1 = SynchronizedBatchNorm2d(64)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(64, 64)
        self.bn2 = SynchronizedBatchNorm2d(64)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv3 = conv3x3(64, 128)
        self.bn3 = SynchronizedBatchNorm2d(128)
        self.relu3 = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, 128, layers[0], groups=groups)
        self.layer2 = self._make_layer(block, 256, layers[1], stride=2, groups=groups)
        self.layer3 = self._make_layer(block, 512, layers[2], stride=2, groups=groups)
        self.layer4 = self._make_layer(block, 1024, layers[3], stride=2, groups=groups)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(1024 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels // m.groups
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, SynchronizedBatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1, groups=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=1, stride=stride, bias=False),
                SynchronizedBatchNorm2d(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, groups, downsample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=groups))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu1(self.bn1(self.conv1(x)))
        x = self.relu2(self.bn2(self.conv2(x)))
        x = self.relu3(self.bn3(self.conv3(x)))
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x


'''
def resnext50(pretrained=False, **kwargs):
    """Constructs a ResNet-50 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on Places
    """
    model = ResNeXt(GroupBottleneck, [3, 4, 6, 3], **kwargs)
    if pretrained:
        model.load_state_dict(load_url(model_urls['resnext50']), strict=False)
    return model
'''


def resnext101(pretrained=False, **kwargs):
    """Constructs a ResNet-101 model.

    Args:
        pretrained (bool): If True, returns a model pre-trained on Places
    """
    model = ResNeXt(GroupBottleneck, [3, 4, 23, 3], **kwargs)
    if pretrained:
        model.load_state_dict(load_url(model_urls['resnext101']), strict=False)
    return model


# def resnext152(pretrained=False, **kwargs):
#     """Constructs a ResNeXt-152 model.
#
#     Args:
#         pretrained (bool): If True, returns a model pre-trained on Places
#     """
#     model = ResNeXt(GroupBottleneck, [3, 8, 36, 3], **kwargs)
#     if pretrained:
#         model.load_state_dict(load_url(model_urls['resnext152']))
#     return model


def load_url(url, model_dir='./pretrained', map_location=None):
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    filename = url.split('/')[-1]
    cached_file = os.path.join(model_dir, filename)
    if not os.path.exists(cached_file):
        sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
        urlretrieve(url, cached_file)
    return torch.load(cached_file, map_location=map_location)


================================================
FILE: seeing/yz_dataset.py
================================================
import torch, numpy


class YZDataset():
    def __init__(self, zdim=256, nlabels=1, distribution=[1.], device='cpu'):
        self.zdim = zdim
        self.nlabels = nlabels
        self.device = device
        self.distribution = distribution
        assert (len(distribution) == nlabels)

    def __call__(self, seeds):
        zs, ys = [], []
        for seed in seeds:
            rng = numpy.random.RandomState(seed)
            z = torch.from_numpy(
                rng.standard_normal(self.zdim).reshape(
                    1, self.zdim)).float().to(self.device)
            y = torch.from_numpy(
                rng.choice(self.nlabels, 1, replace=False,
                           p=self.distribution)).long().to(self.device)
            zs.append(z)
            ys.append(y)
        return torch.cat(zs, dim=0), torch.cat(ys, dim=0)


if __name__ == '__main__':
    sampler = YZDataset()
    a, d = sampler([10, 11])
    b, e = sampler([12, 13])
    assert ((a - b).mean() > 1e-3)
    c, f = sampler([10, 11])
    assert ((a - c).mean() < 1e-3)


================================================
FILE: seeing/zdataset.py
================================================
import os, torch, numpy
from torch.utils.data import TensorDataset

def z_dataset_for_model(model, size=100, seed=1):
    return TensorDataset(z_sample_for_model(model, size, seed))

def z_sample_for_model(model, size=100, seed=1):
    # If the model is marked with an input shape, use it.
    if hasattr(model, 'input_shape'):
        sample = standard_z_sample(size, model.input_shape[1], seed=seed).view(
                (size,) + model.input_shape[1:])
        return sample
    # Examine first conv in model to determine input feature size.
    first_layer = [c for c in model.modules()
            if isinstance(c, (torch.nn.Conv2d, torch.nn.ConvTranspose2d,
                torch.nn.Linear))][0]
    # 4d input if convolutional, 2d input if first layer is linear.
    if isinstance(first_layer, (torch.nn.Conv2d, torch.nn.ConvTranspose2d)):
        sample = standard_z_sample(
                size, first_layer.in_channels, seed=seed)[:,:,None,None]
    else:
        sample = standard_z_sample(
                size, first_layer.in_features, seed=seed)
    return sample

def standard_z_sample(size, depth, seed=1, device=None):
	'''
	Generate a standard set of random Z as a (size, z_dimension) tensor.
	With the same random seed, it always returns the same z (e.g.,
	the first one is always the same regardless of the size.)
	'''
	# Use numpy RandomState since it can be done deterministically
	# without affecting global state
	rng = numpy.random.RandomState(seed)
	result = torch.from_numpy(
			rng.standard_normal(size * depth)
			.reshape(size, depth)).float()
	if device is not None:
		result = result.to(device)
	return result


================================================
FILE: train.py
================================================
import argparse
import os
import copy
import pprint
from os import path

import torch
import numpy as np
from torch import nn

from gan_training import utils
from gan_training.train import Trainer, update_average
from gan_training.logger import Logger
from gan_training.checkpoints import CheckpointIO
from gan_training.inputs import get_dataset
from gan_training.distributions import get_ydist, get_zdist
from gan_training.eval import Evaluator
from gan_training.config import (load_config, get_clusterer, build_models, build_optimizers)
from seeing.pidfile import exit_if_job_done, mark_job_done

torch.backends.cudnn.benchmark = True

# Arguments
parser = argparse.ArgumentParser(
    description='Train a GAN with different regularization strategies.')
parser.add_argument('config', type=str, help='Path to config file.')
parser.add_argument('--outdir', type=str, help='used to override outdir (useful for multiple runs)')
parser.add_argument('--nepochs', type=int, default=250, help='number of epochs to run before terminating')
parser.add_argument('--model_it', type=int, default=-1, help='which model iteration to load from, -1 loads the most recent model')
parser.add_argument('--devices', nargs='+', type=str, default=['0'], help='devices to use')

args = parser.parse_args()
config = load_config(args.config, 'configs/default.yaml')
out_dir = config['training']['out_dir'] if args.outdir is None else args.outdir


def main():
    pp = pprint.PrettyPrinter(indent=1)
    pp.pprint({
        'data': config['data'],
        'generator': config['generator'],
        'discriminator': config['discriminator'],
        'clusterer': config['clusterer'],
        'training': config['training']
    })
    is_cuda = torch.cuda.is_available()

    # Short hands
    batch_size = config['training']['batch_size']
    log_every = config['training']['log_every']
    inception_every = config['training']['inception_every']
    backup_every = config['training']['backup_every']
    sample_nlabels = config['training']['sample_nlabels']
    nlabels = config['data']['nlabels']
    sample_nlabels = min(nlabels, sample_nlabels)

    checkpoint_dir = path.join(out_dir, 'chkpts')

    # Create missing directories
    if not path.exists(out_dir):
        os.makedirs(out_dir)
    if not path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    # Logger
    checkpoint_io = CheckpointIO(checkpoint_dir=checkpoint_dir)

    device = torch.device("cuda:0" if is_cuda else "cpu")

    train_dataset, _ = get_dataset(
        name=config['data']['type'],
        data_dir=config['data']['train_dir'],
        size=config['data']['img_size'],
        deterministic=config['data']['deterministic'])

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        num_workers=config['training']['nworkers'],
        shuffle=True,
        pin_memory=True,
        sampler=None,
        drop_last=True)

    # Create models
    generator, discriminator = build_models(config)

    # Put models on gpu if needed
    generator = generator.to(device)
    discriminator = discriminator.to(device)

    for name, module in discriminator.named_modules():
        if isinstance(module, nn.Sigmoid):
            print('Found sigmoid layer in discriminator; not compatible with BCE with logits')
            exit()

    g_optimizer, d_optimizer = build_optimizers(generator, discriminator, config)

    devices = [int(x) for x in args.devices]
    generator = nn.DataParallel(generator, device_ids=devices)
    discriminator = nn.DataParallel(discriminator, device_ids=devices)

    # Register modules to checkpoint
    checkpoint_io.register_modules(generator=generator,
                                   discriminator=discriminator,
                                   g_optimizer=g_optimizer,
                                   d_optimizer=d_optimizer)

    # Logger
    logger = Logger(log_dir=path.join(out_dir, 'logs'),
                    img_dir=path.join(out_dir, 'imgs'),
                    monitoring=config['training']['monitoring'],
                    monitoring_dir=path.join(out_dir, 'monitoring'))

    # Distributions
    ydist = get_ydist(nlabels, device=device)
    zdist = get_zdist(config['z_dist']['type'], config['z_dist']['dim'], device=device)

    ntest = config['training']['ntest']
    x_test, y_test = utils.get_nsamples(train_loader, ntest)
    x_cluster, y_cluster = utils.get_nsamples(train_loader, config['clusterer']['nimgs'])
    x_test, y_test = x_test.to(device), y_test.to(device)
    z_test = zdist.sample((ntest, ))
    utils.save_images(x_test, path.join(out_dir, 'real.png'))
    logger.add_imgs(x_test, 'gt', 0)

    # Test generator
    if config['training']['take_model_average']:
        print('Taking model average')
        bad_modules = [nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d]
        for model in [generator, discriminator]:
            for name, module in model.named_modules():
                for bad_module in bad_modules:
                    if isinstance(module, bad_module):
                        print('Batch norm in discriminator not compatible with exponential moving average')
                        exit()
        generator_test = copy.deepcopy(generator)
        checkpoint_io.register_modules(generator_test=generator_test)
    else:
        generator_test = generator

    clusterer = get_clusterer(config)(discriminator=discriminator,
                                      x_cluster=x_cluster,
                                      x_labels=y_cluster,
                                      gt_nlabels=config['data']['nlabels'],
                                      **config['clusterer']['kwargs'])

    # Load checkpoint if it exists
    it = utils.get_most_recent(checkpoint_dir, 'model') if args.model_it == -1 else args.model_it
    it, epoch_idx, loaded_clusterer = checkpoint_io.load_models(it=it, load_samples='supervised' != config['clusterer']['name'])

    if loaded_clusterer is None:
        print('Initializing new clusterer. The first clustering can be quite slow.')
        clusterer.recluster(discriminator=discriminator)
        checkpoint_io.save_clusterer(clusterer, it=0)
        np.savez(os.path.join(checkpoint_dir, 'cluster_samples.npz'), x=x_cluster)
    else:
        print('Using loaded clusterer')
        clusterer = loaded_clusterer

    # Evaluator
    evaluator = Evaluator(
        generator_test,
        zdist,
        ydist,
        train_loader=train_loader,
        clusterer=clusterer,
        batch_size=batch_size,
        device=device,
        inception_nsamples=config['training']['inception_nsamples'])

    # Trainer
    trainer = Trainer(generator,
                      discriminator,
                      g_optimizer,
                      d_optimizer,
                      gan_type=config['training']['gan_type'],
                      reg_type=config['training']['reg_type'],
                      reg_param=config['training']['reg_param'])

    # Training loop
    print('Start training...')
    while it < args.nepochs * len(train_loader):
        epoch_idx += 1

        for x_real, y in train_loader:
            it += 1

            x_real, y = x_real.to(device), y.to(device)
            z = zdist.sample((batch_size, ))
            y = clusterer.get_labels(x_real, y).to(device)

            # Discriminator updates
            dloss, reg = trainer.discriminator_trainstep(x_real, y, z)
            logger.add('losses', 'discriminator', dloss, it=it)
            logger.add('losses', 'regularizer', reg, it=it)

            # Generators updates
            gloss = trainer.generator_trainstep(y, z)
            logger.add('losses', 'generator', gloss, it=it)

            if config['training']['take_model_average']:
                update_average(generator_test, generator, beta=config['training']['model_average_beta'])

            # Print stats
            if it % log_every == 0:
                g_loss_last = logger.get_last('losses', 'generator')
                d_loss_last = logger.get_last('losses', 'discriminator')
                d_reg_last = logger.get_last('losses', 'regularizer')
                print('[epoch %0d, it %4d] g_loss = %.4f, d_loss = %.4f, reg=%.4f'
                      % (epoch_idx, it, g_loss_last, d_loss_last, d_reg_last))

            if it % config['training']['recluster_every'] == 0 and it > config['training']['burnin_time']:
                # print cluster distribution for online methods
                if it % 100 == 0 and config['training']['recluster_every'] <= 100:
                    print(f'[epoch {epoch_idx}, it {it}], distribution: {clusterer.get_label_distribution(x_real)}')
                clusterer.recluster(discriminator=discriminator, x_batch=x_real)

            # (i) Sample if necessary
            if it % config['training']['sample_every'] == 0:
                print('Creating samples...')
                x = evaluator.create_samples(z_test, y_test)
                x = evaluator.create_samples(z_test, clusterer.get_labels(x_test, y_test).to(device))
                logger.add_imgs(x, 'all', it)

                for y_inst in range(sample_nlabels):
                    x = evaluator.create_samples(z_test, y_inst)
                    logger.add_imgs(x, '%04d' % y_inst, it)

            # (ii) Compute inception if necessary
            if it % inception_every == 0 and it > 0:
                print('PyTorch Inception score...')
                inception_mean, inception_std = evaluator.compute_inception_score()
                logger.add('metrics', 'pt_inception_mean', inception_mean, it=it)
                logger.add('metrics', 'pt_inception_stddev', inception_std, it=it)
                print(f'[epoch {epoch_idx}, it {it}] pt_inception_mean: {inception_mean}, pt_inception_stddev: {inception_std}')

            # (iii) Backup if necessary
            if it % backup_every == 0:
                print('Saving backup...')
                checkpoint_io.save('model_%08d.pt' % it, it=it)
                checkpoint_io.save_clusterer(clusterer, int(it))
                logger.save_stats('stats_%08d.p' % it)

                if it > 0:
                    checkpoint_io.save('model.pt', it=it)


if __name__ == '__main__':
    exit_if_job_done(out_dir)
    main()
    mark_job_done(out_dir)


================================================
FILE: utils/classifiers/__init__.py
================================================
from classifiers import stacked_mnist, cifar, places, imagenet

classifier_dict = {
    'stacked_mnist': stacked_mnist.Classifier,
    'cifar': cifar.Classifier, 
    'places': places.Classifier,
    'imagenet': imagenet.Classifier
}

================================================
FILE: utils/classifiers/cifar.py
================================================
import sys
sys.path.append('utils/classifiers')

from pytorch_playground.cifar.model import cifar10

class Classifier():
    def __init__(self):
        self.classifier = cifar10().cuda()

    def get_predictions(self, x):
        assert(x.size(1) == 3)
        return self.classifier(x).argmax(dim=1)


================================================
FILE: utils/classifiers/imagenet.py
================================================
import torch
import torchvision.models as models
from torchvision import transforms as trn
from torch.nn import functional as F
import os


class Classifier():
    def __init__(self):
        self.model = models.resnet50(pretrained=True).cuda()
        self.model.eval()

        self.mean = [0.485, 0.456, 0.406]
        self.std = [0.229, 0.224, 0.225]
        self.trn = trn.Normalize(self.mean, self.std)

        import json
        with open("utils/classifiers/imagenet_class_index.json") as f:
            self.class_idx = json.load(f)

    def transform(self, x):
        x = F.interpolate(x, size=(224, 224)) / 255.
        x = torch.stack([self.trn(xi) for xi in x]).cuda()
        return x

    def get_name(self, class_id):
        return self.class_idx[str(class_id)][1]

    def get_predictions_and_confidence(self, x):
        x = self.transform(x)
        logit = self.model.forward(x)
        values, ind = logit.max(dim=1)
        return ind, values

    def get_predictions(self, x):
        x = self.transform(x)
        logit = self.model.forward(x)
        return logit.argmax(dim=1)


================================================
FILE: utils/classifiers/imagenet_class_index.json
================================================
{"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]}

================================================
FILE: utils/classifiers/places.py
================================================
import torch
import torchvision.models as models
from torchvision import transforms as trn
from torch.nn import functional as F
import os


class Classifier():
    def __init__(self):
        # the architecture to use
        arch = 'resnet50'

        # load the pre-trained weights
        model_file = '%s_places365.pth.tar' % arch
        if not os.access(model_file, os.W_OK):
            weight_url = 'http://places2.csail.mit.edu/models_places365/' + model_file
            os.system('wget ' + weight_url)

        model = models.__dict__[arch](num_classes=365)
        checkpoint = torch.load(model_file,
                                map_location=lambda storage, loc: storage)
        state_dict = {
            str.replace(k, 'module.', ''): v
            for k, v in checkpoint['state_dict'].items()
        }
        model.load_state_dict(state_dict)
        model.cuda()
        model.eval()
        self.model = model
        self.mean = [0.485, 0.456, 0.406]
        self.std =  [0.229, 0.224, 0.225]
        self.trn = trn.Normalize(self.mean, self.std)

        file_name = 'categories_places365.txt'
        if not os.access(file_name, os.W_OK):
            synset_url = 'https://raw.githubusercontent.com/csailvision/places365/master/categories_places365.txt'
            os.system('wget ' + synset_url)
        classes = list()
        with open(file_name) as class_file:
            for line in class_file:
                class_name = line.strip().split(' ')[0][3:]
                classes.append(''.join(class_name.split('/')))
        self.classes = classes

    def get_name(self, id):
        return self.classes[id]
        
    def transform(self, x):
        x = F.interpolate(x, size=(224, 224)) / 255.
        x = torch.stack([self.trn(xi) for xi in x]).cuda()
        return x

    def get_predictions_and_confidence(self, x):
        x = self.transform(x)
        logit = self.model.forward(x)
        values, ind = logit.max(dim=1)
        return ind, values

    def get_predictions(self, x):
        x = self.transform(x)
        logit = self.model.forward(x)
        return logit.argmax(dim=1)

if __name__ == '__main__':
    x = torch.randn((2,3,128,128))
    c = Classifier()
    x = c.get_predictions(x)
    print(x)


================================================
FILE: utils/classifiers/pytorch_playground/.gitignore
================================================
__pycache__
*.jpg
*.png
acc1_acc5.txt
log
pytorch_playground.egg-info
script/val224_compressed.pkl


================================================
FILE: utils/classifiers/pytorch_playground/LICENSE
================================================
MIT License

Copyright (c) 2017 Aaron Chen

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: utils/classifiers/pytorch_playground/README.md
================================================
This is a playground for pytorch beginners, which contains predefined models on popular dataset. Currently we support 
- mnist, svhn
- cifar10, cifar100
- stl10
- alexnet
- vgg16, vgg16_bn, vgg19, vgg19_bn
- resnet18, resnet34, resnet50, resnet101, resnet152
- squeezenet_v0, squeezenet_v1
- inception_v3

Here is an example for MNIST dataset. This will download the dataset and pre-trained model automatically.
```
import torch
from torch.autograd import Variable
from utee import selector
model_raw, ds_fetcher, is_imagenet = selector.select('mnist')
ds_val = ds_fetcher(batch_size=10, train=False, val=True)
for idx, (data, target) in enumerate(ds_val):
    data =  Variable(torch.FloatTensor(data)).cuda()
    output = model_raw(data)
```

Also, if want to train the MLP model on mnist, simply run `python mnist/train.py`


# Install
```
python3 setup.py develop --user
```

# ImageNet dataset
We provide precomputed imagenet validation dataset with 224x224x3 size. We first resize the shorter size of image to 256, then we crop 224x224 image in the center. Then we encode the cropped images to jpg string and dump to pickle. 
- `cd script`
- Download the `val224_compressed.pkl` ([Tsinghua](http://ml.cs.tsinghua.edu.cn/~chenxi/dataset/val224_compressed.pkl) /  [Google Drive](https://drive.google.com/file/d/1U8ir2fOR4Sir3FCj9b7FQRPSVsycTfVc/view?usp=sharing))
- `python convert.py` (needs 48G memory, thanks [@jnorwood](https://github.com/aaron-xichen/pytorch-playground/issues/18) )


# Quantization
We also provide a simple demo to quantize these models to specified bit-width with several methods, including linear method, minmax method and non-linear method.

`quantize --type cifar10 --quant_method linear --param_bits 8 --fwd_bits 8 --bn_bits 8 --ngpu 1`
   
## Top1 Accuracy
We evaluate the performance of popular dataset and models with linear quantized method. The bit-width of running mean and running variance in BN are 10 bits for all results. (except for 32-float)


|Model|32-float  |12-bit  |10-bit |8-bit  |6-bit  |
|:----|:--------:|:------:|:-----:|:-----:|:-----:|
|[MNIST](http://ml.cs.tsinghua.edu.cn/~chenxi/pytorch-models/mnist-b07bb66b.pth)|98.42|98.43|98.44|98.44|98.32|
|[SVHN](http://ml.cs.tsinghua.edu.cn/~chenxi/pytorch-models/svhn-f564f3d8.pth)|96.03|96.03|96.04|96.02|95.46|
|[CIFAR10](http://ml.cs.tsinghua.edu.cn/~chenxi/pytorch-models/cifar10-d875770b.pth)|93.78|93.79|93.80|93.58|90.86|
|[CIFAR100](http://ml.cs.tsinghua.edu.cn/~chenxi/pytorch-models/cifar100-3a55a987.pth)|74.27|74.21|74.19|73.70|66.32|
|[STL10](http://ml.cs.tsinghua.edu.cn/~chenxi/pytorch-models/stl10-866321e9.pth)|77.59|77.65|77.70|77.59|73.40|
|[AlexNet](https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth)|55.70/78.42|55.66/78.41|55.54/78.39|54.17/77.29|18.19/36.25|
|[VGG16](https://download.pytorch.org/models/vgg16-397923af.pth)|70.44/89.43|70.45/89.43|70.44/89.33|69.99/89.17|53.33/76.32|
|[VGG19](https://download.pytorch.org/models/vgg19-dcbb9e9d.pth)|71.36/89.94|71.35/89.93|71.34/89.88|70.88/89.62|56.00/78.62|
|[ResNet18](https://download.pytorch.org/models/resnet18-5c106cde.pth)|68.63/88.31|68.62/88.33|68.49/88.25|66.80/87.20|19.14/36.49|
|[ResNet34](https://download.pytorch.org/models/resnet34-333f7ec4.pth)|72.50/90.86|72.46/90.82|72.45/90.85|71.47/90.00|32.25/55.71|
|[ResNet50](https://download.pytorch.org/models/resnet50-19c8e357.pth)|74.98/92.17|74.94/92.12|74.91/92.09|72.54/90.44|2.43/5.36|
|[ResNet101](https://download.pytorch.org/models/resnet101-5d3b4d8f.pth)|76.69/93.30|76.66/93.25|76.22/92.90|65.69/79.54|1.41/1.18|
|[ResNet152](https://download.pytorch.org/models/resnet152-b121ed2d.pth)|77.55/93.59|77.51/93.62|77.40/93.54|74.95/92.46|9.29/16.75|
|[SqueezeNetV0](https://download.pytorch.org/models/squeezenet1_0-a815701f.pth)|56.73/79.39|56.75/79.40|56.70/79.27|53.93/77.04|14.21/29.74|
|[SqueezeNetV1](https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth)|56.52/79.13|56.52/79.15|56.24/79.03|54.56/77.33|17.10/32.46|
|[InceptionV3](https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth)|76.41/92.78|76.43/92.71|76.44/92.73|73.67/91.34|1.50/4.82|

**Note: ImageNet 32-float models are directly from torchvision**


## Selected Arguments
Here we give an overview of selected arguments of `quantize.py`

|Flag                          |Default value|Description & Options|
|:-----------------------------|:-----------------------:|:--------------------------------|
|type|cifar10|mnist,svhn,cifar10,cifar100,stl10,alexnet,vgg16,vgg16_bn,vgg19,vgg19_bn,resent18,resent34,resnet50,resnet101,resnet152,squeezenet_v0,squeezenet_v1,inception_v3|
|quant_method|linear|quantization method:linear,minmax,log,tanh|
|param_bits|8|bit-width of weights and bias|
|fwd_bits|8|bit-width of activation|
|bn_bits|32|bit-width of running mean and running vairance|
|overflow_rate|0.0|overflow rate threshold for linear quantization method|
|n_samples|20|number of samples to make statistics for activation|


================================================
FILE: utils/classifiers/pytorch_playground/cifar/__init__.py
================================================


================================================
FILE: utils/classifiers/pytorch_playground/cifar/dataset.py
================================================
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os

def get10(batch_size, data_root='/tmp/public_dataset/pytorch', train=True, val=True, **kwargs):
    data_root = os.path.expanduser(os.path.join(data_root, 'cifar10-data'))
    num_workers = kwargs.setdefault('num_workers', 1)
    kwargs.pop('input_size', None)
    print("Building CIFAR-10 data loader with {} workers".format(num_workers))
    ds = []
    if train:
        train_loader = torch.utils.data.DataLoader(
            datasets.CIFAR10(
                root=data_root, train=True, download=True,
                transform=transforms.Compose([
                    transforms.Pad(4),
                    transforms.RandomCrop(32),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                ])),
            batch_size=batch_size, shuffle=True, **kwargs)
        ds.append(train_loader)
    if val:
        test_loader = torch.utils.data.DataLoader(
            datasets.CIFAR10(
                root=data_root, train=False, download=True,
                transform=transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                ])),
            batch_size=batch_size, shuffle=False, **kwargs)
        ds.append(test_loader)
    ds = ds[0] if len(ds) == 1 else ds
    return ds

def get100(batch_size, data_root='/tmp/public_dataset/pytorch', train=True, val=True, **kwargs):
    data_root = os.path.expanduser(os.path.join(data_root, 'cifar100-data'))
    num_workers = kwargs.setdefault('num_workers', 1)
    kwargs.pop('input_size', None)
    print("Building CIFAR-100 data loader with {} workers".format(num_workers))
    ds = []
    if train:
        train_loader = torch.utils.data.DataLoader(
            datasets.CIFAR100(
                root=data_root, train=True, download=True,
                transform=transforms.Compose([
                    transforms.Pad(4),
                    transforms.RandomCrop(32),
                    transforms.RandomHorizontalFlip(),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                ])),
            batch_size=batch_size, shuffle=True, **kwargs)
        ds.append(train_loader)

    if val:
        test_loader = torch.utils.data.DataLoader(
            datasets.CIFAR100(
                root=data_root, train=False, download=True,
                transform=transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                ])),
            batch_size=batch_size, shuffle=False, **kwargs)
        ds.append(test_loader)
    ds = ds[0] if len(ds) == 1 else ds
    return ds


================================================
FILE: utils/classifiers/pytorch_playground/cifar/model.py
================================================
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
from IPython import embed
from collections import OrderedDict

from utee import misc
print = misc.logger.info

model_urls = {
    'cifar10': 'http://ml.cs.tsinghua.edu.cn/~chenxi/pytorch-models/cifar10-d875770b.pth',
}

class CIFAR(nn.Module):
    def __init__(self, features, n_channel, num_classes):
        super(CIFAR, self).__init__()
        assert isinstance(features, nn.Sequential), type(features)
        self.features = features
        self.classifier = nn.Sequential(
            nn.Linear(n_channel, num_classes)
        )
        print(self.features)
        print(self.classifier)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for i, v in enumerate(cfg):
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            padding = v[1] if isinstance(v, tuple) else 1
            out_channels = v[0] if isinstance(v, tuple) else v
            conv2d = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=padding)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(out_channels, affine=False), nn.ReLU()]
            else:
                layers += [conv2d, nn.ReLU()]
            in_channels = out_channels
    return nn.Sequential(*layers)

def cifar10(n_channel=128):
    cfg = [n_channel, n_channel, 'M', 2*n_channel, 2*n_channel, 'M', 4*n_channel, 4*n_channel, 'M', (8*n_channel, 0), 'M']
    layers = make_layers(cfg, batch_norm=True)
    model = CIFAR(layers, n_channel=8*n_channel, num_classes=10)
    m = model_zoo.load_url(model_urls['cifar10'])
    state_dict = m.state_dict() if isinstance(m, nn.Module) else m
    assert isinstance(state_dict, (dict, OrderedDict)), type(state_dict)
    model.load_state_dict(state_dict)
    print('loaded')
    return model


if __name__ == '__main__':
    model = cifar10(128, pretrained='log/cifar10/best-135.pth')
    embed()


================================================
FILE: utils/classifiers/pytorch_playground/cifar/train.py
================================================
import argparse
import os
import time

from utee import misc
import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

import dataset
import model

from IPython import embed

parser = argparse.ArgumentParser(description='PyTorch CIFAR-X Example')
parser.add_argument('--type', default='cifar10', help='cifar10|cifar100')
parser.add_argument('--channel', type=int, default=128, help='first conv channel (default: 32)')
parser.add_argument('--wd', type=float, default=0.00, help='weight decay')
parser.add_argument('--batch_size', type=int, default=200, help='input batch size for training (default: 64)')
parser.add_argument('--epochs', type=int, default=150, help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.001, help='learning rate (default: 1e-3)')
parser.add_argument('--gpu', default=None, help='index of gpus to use')
parser.add_argument('--ngpu', type=int, default=2, help='number of gpus to use')
parser.add_argument('--seed', type=int, default=117, help='random seed (default: 1)')
parser.add_argument('--log_interval', type=int, default=100,  help='how many batches to wait before logging training status')
parser.add_argument('--test_interval', type=int, default=5,  help='how many epochs to wait before another test')
parser.add_argument('--logdir', default='log/default', help='folder to save to the log')
parser.add_argument('--decreasing_lr', default='80,120', help='decreasing strategy')
args = parser.parse_args()
args.logdir = os.path.join(os.path.dirname(__file__), args.logdir)
misc.logger.init(args.logdir, 'train_log')
print = misc.logger.info

# select gpu
args.gpu = misc.auto_select_gpu(utility_bound=0, num_gpu=args.ngpu, selected_gpus=args.gpu)
args.ngpu = len(args.gpu)

# logger
misc.ensure_dir(args.logdir)
print("=================FLAGS==================")
for k, v in args.__dict__.items():
    print('{}: {}'.format(k, v))
print("========================================")

# seed
args.cuda = torch.cuda.is_available()
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

# data loader and model
assert args.type in ['cifar10', 'cifar100'], args.type
if args.type == 'cifar10':
    train_loader, test_loader = dataset.get10(batch_size=args.batch_size, num_workers=1)
    model = model.cifar10(n_channel=args.channel)
else:
    train_loader, test_loader = dataset.get100(batch_size=args.batch_size, num_workers=1)
    model = model.cifar100(n_channel=args.channel)
model = torch.nn.DataParallel(model, device_ids= range(args.ngpu))
if args.cuda:
    model.cuda()

# optimizer
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd)
decreasing_lr = list(map(int, args.decreasing_lr.split(',')))
print('decreasing_lr: ' + str(decreasing_lr))
best_acc, old_file = 0, None
t_begin = time.time()
try:
    # ready to go
    for epoch in range(args.epochs):
        model.train()
        if epoch in decreasing_lr:
            optimizer.param_groups[0]['lr'] *= 0.1
        for batch_idx, (data, target) in enumerate(train_loader):
            indx_target = target.clone()
            if args.cuda:
                data, target = data.cuda(), target.cuda()
            data, target = Variable(data), Variable(target)

            optimizer.zero_grad()
            output = model(data)
            loss = F.cross_entropy(output, target)
            loss.backward()
            optimizer.step()

            if batch_idx % args.log_interval == 0 and batch_idx > 0:
                pred = output.data.max(1)[1]  # get the index of the max log-probability
                correct = pred.cpu().eq(indx_target).sum()
                acc = correct * 1.0 / len(data)
                print('Train Epoch: {} [{}/{}] Loss: {:.6f} Acc: {:.4f} lr: {:.2e}'.format(
                    epoch, batch_idx * len(data), len(train_loader.dataset),
                    loss.data[0], acc, optimizer.param_groups[0]['lr']))

        elapse_time = time.time() - t_begin
        speed_epoch = elapse_time / (epoch + 1)
        speed_batch = speed_epoch / len(train_loader)
        eta = speed_epoch * args.epochs - elapse_time
        print("Elapsed {:.2f}s, {:.2f} s/epoch, {:.2f} s/batch, ets {:.2f}s".format(
            elapse_time, speed_epoch, speed_batch, eta))
        misc.model_snapshot(model, os.path.join(args.logdir, 'latest.pth'))

        if epoch % args.test_interval == 0:
            model.eval()
            test_loss = 0
            correct = 0
            for data, target in test_loader:
                indx_target = target.clone()
                if args.cuda:
                    data, target = data.cuda(), target.cuda()
                data, target = Variable(data, volatile=True), Variable(target)
                output = model(data)
                test_loss += F.cross_entropy(output, target).data[0]
                pred = output.data.max(1)[1]  # get the index of the max log-probability
                correct += pred.cpu().eq(indx_target).sum()

            test_loss = test_loss / len(test_loader) # average over number of mini-batch
            acc = 100. * correct / len(test_loader.dataset)
            print('\tTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'.format(
                test_loss, correct, len(test_loader.dataset), acc))
            if acc > best_acc:
                new_file = os.path.join(args.logdir, 'best-{}.pth'.format(epoch))
                misc.model_snapshot(model, new_file, old_file=old_file, verbose=True)
                best_acc = acc
                old_file = new_file
except Exception as e:
    import traceback
    traceback.print_exc()
finally:
    print("Total Elapse: {:.2f}, Best Result: {:.3f}%".format(time.time()-t_begin, best_acc))


================================================
FILE: utils/classifiers/pytorch_playground/quantize.py
================================================
import argparse
from utee import misc, quant, selector
import torch
import torch.backends.cudnn as cudnn
cudnn.benchmark =True
from collections import OrderedDict

def main():
    parser = argparse.ArgumentParser(description='PyTorch SVHN Example')
    parser.add_argument('--type', default='cifar10', help='|'.join(selector.known_models))
    parser.add_argument('--quant_method', default='linear', help='linear|minmax|log|tanh')
    parser.add_argument('--batch_size', type=int, default=100, help='input batch size for training (default: 64)')
    parser.add_argument('--gpu', default=None, help='index of gpus to use')
    parser.add_argument('--ngpu', type=int, default=8, help='number of gpus to use')
    parser.add_argument('--seed', type=int, default=117, help='random seed (default: 1)')
    parser.add_argument('--model_root', default='~/.torch/models/', help='folder to save the model')
    parser.add_argument('--data_root', default='/data/public_dataset/pytorch/', help='folder to save the model')
    parser.add_argument('--logdir', default='log/default', help='folder to save to the log')

    parser.add_argument('--input_size', type=int, default=224, help='input size of image')
    parser.add_argument('--n_sample', type=int, default=20, help='number of samples to infer the scaling factor')
    parser.add_argument('--param_bits', type=int, default=8, help='bit-width for parameters')
    parser.add_argument('--bn_bits', type=int, default=32, help='bit-width for running mean and std')
    parser.add_argument('--fwd_bits', type=int, default=8, help='bit-width for layer output')
    parser.add_argument('--overflow_rate', type=float, default=0.0, help='overflow rate')
    args = parser.parse_args()

    args.gpu = misc.auto_select_gpu(utility_bound=0, num_gpu=args.ngpu, selected_gpus=args.gpu)
    args.ngpu = len(args.gpu)
    misc.ensure_dir(args.logdir)
    args.model_root = misc.expand_user(args.model_root)
    args.data_root = misc.expand_user(args.data_root)
    args.input_size = 299 if 'inception' in args.type else args.input_size
    assert args.quant_method in ['linear', 'minmax', 'log', 'tanh']
    print("=================FLAGS==================")
    for k, v in args.__dict__.items():
        print('{}: {}'.format(k, v))
    print("========================================")

    assert torch.cuda.is_available(), 'no cuda'
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    # load model and dataset fetcher
    model_raw, ds_fetcher, is_imagenet = selector.select(args.type, model_root=args.model_root)
    args.ngpu = args.ngpu if is_imagenet else 1

    # quantize parameters
    if args.param_bits < 32:
        state_dict = model_raw.state_dict()
        state_dict_quant = OrderedDict()
        sf_dict = OrderedDict()
        for k, v in state_dict.items():
            if 'running' in k:
                if args.bn_bits >=32:
                    print("Ignoring {}".format(k))
                    state_dict_quant[k] = v
                    continue
                else:
                    bits = args.bn_bits
            else:
                bits = args.param_bits

            if args.quant_method == 'linear':
                sf = bits - 1. - quant.compute_integral_part(v, overflow_rate=args.overflow_rate)
                v_quant  = quant.linear_quantize(v, sf, bits=bits)
            elif args.quant_method == 'log':
                v_quant = quant.log_minmax_quantize(v, bits=bits)
            elif args.quant_method == 'minmax':
                v_quant = quant.min_max_quantize(v, bits=bits)
            else:
                v_quant = quant.tanh_quantize(v, bits=bits)
            state_dict_quant[k] = v_quant
            print(k, bits)
        model_raw.load_state_dict(state_dict_quant)

    # quantize forward activation
    if args.fwd_bits < 32:
        model_raw = quant.duplicate_model_with_quant(model_raw, bits=args.fwd_bits, overflow_rate=args.overflow_rate,
                                                     counter=args.n_sample, type=args.quant_method)
        print(model_raw)
        val_ds_tmp = ds_fetcher(10, data_root=args.data_root, train=False, input_size=args.input_size)
        misc.eval_model(model_raw, val_ds_tmp, ngpu=1, n_sample=args.n_sample, is_imagenet=is_imagenet)

    # eval model
    val_ds = ds_fetcher(args.batch_size, data_root=args.data_root, train=False, input_size=args.input_size)
    acc1, acc5 = misc.eval_model(model_raw, val_ds, ngpu=args.ngpu, is_imagenet=is_imagenet)

    # print sf
    print(model_raw)
    res_str = "type={}, quant_method={}, param_bits={}, bn_bits={}, fwd_bits={}, overflow_rate={}, acc1={:.4f}, acc5={:.4f}".format(
        args.type, args.quant_method, args.param_bits, args.bn_bits, args.fwd_bits, args.overflow_rate, acc1, acc5)
    print(res_str)
    with open('acc1_acc5.txt', 'a') as f:
        f.write(res_str + '\n')


if __name__ == '__main__':
    main()


================================================
FILE: utils/classifiers/pytorch_playground/requirements.txt
================================================
Pillow==6.1
torchvision==0.4.2
tqdm==4.41.1
opencv-python==4.1.2.30
joblib==0.14.1


================================================
FILE: utils/classifiers/pytorch_playground/roadmap_zh.md
================================================
# 定点化Roadmap
首先定点化的setting分好几种，主要如下所示 (w代表weight，a代表activation，g代表gradient)

最近两年的目前有13篇直接相关的论文，截止到2016年7月

## float转化为定点版本，不允许fine-tune
- w定点，a浮点
    - Resiliency of Deep Neural Networks under Quantization [Wongyong Sung, Sungho Shin, 2016.01.07, ICLR2016] {5bit在CIFAR10上恢复正确率}
    - Fixed Point Quantization of Deep Convolutional Networks [Darryl D.Lin, Sachin S.Talathi, 2016.06.02] {每层定点化策略不同，解析解求出}
- w+a定点
    - Hardware-oriented approximation of convolutional neural networks [Philipp Gysel, Mohammad Motamedi, ICLR 2016 Workshop] {ImageNet上8bit-8bit掉0.9%，AlexNet}
    - Energy-Efficient ConvNets Through Approximate Computing [Bert Moons, KU leuven, 2016.03.22] {结合硬件的trick可以在ImageNet上4-10bit}
    - Going Deeper with Embedded FPGA Platform for Convolutional Neural Network [Jiantao Qiu, Jie Wang, FPGA2016]{ImageNet上8bit-8bit掉1%，AlexNet}

## float转化为定点版本，允许fine-tune
- fine-tune整个网络
    - w定点，a+g浮点
        - Resiliency of Deep Neural Networks under Quantization [Wongyong Sung, Sungho Shin, 2016.01.07, ICLR2016] {2bit即三值网络在CIFAR10上恢复正确率}
    - w+a定点，g浮点
        - Fixed Point Quantization of Deep Convolutional Networks [Darryl D.Lin, Sachin S.Talathi, 2016.06.02] {每层定点化策略不同，解析解求出，CIFAR10上fine-tune后4bit-4bit掉1.32%}
    - w+a+g定点
        - Overcoming Challenges in Fixed Point Training of Deep Convolutional Networks [Darryl D.Lin, Sachin S. Talathi, Qualcomm Research，2016.07.08] {无随机rounding，ImageNet上4bit-16bit-16bit掉7.2%，a和g再小就不收敛}
        - DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients [Shuchang Zhou, Zekun Ni, 2016.06.20] {1bit-2bit-4bit, 第一层和最后一层没有量化，ImageNet上掉5.2%}
- fine-tune最高几层
    - w+a+g定点
        - Overcoming Challenges in Fixed Point Training of Deep Convolutional Networks [Darryl D.Lin, Sachin S. Talathi, Qualcomm Research，2016.07.08] {无随机rounding，ImageNet上4bit-4bit-4bit掉23.3%}
- 分阶段地从低层到高层fine-tune网络
    - w+a+g定点
        - Overcoming Challenges in Fixed Point Training of Deep Convolutional Networks [Darryl D.Lin, Sachin S. Talathi, Qualcomm Research，2016.07.08] {无随机rounding，ImageNet上4bit-4bit-4bit Top5掉11.5%}

## 直接定点从头开始训练
- w定点，a+g浮点
    - 二值网络
        - BinaryConnect: Training Deep Neural Networks with binary weights during propagations [Matthieu Courbariaux, Yoshua Bengio, 2015.11.02, NIPS] {CIFAR10上8.27%, state-of-art}
        - XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks [Mohammad Rastegari, Washington University, 2016.03.16] {ImageNet上39.2%，掉2.8%, AlexNet}
    - 三值网络
        - Ternary Weight Networks [Fengfu Li, Bin Liu, UCAS, China, 2016.05.16] {ImageNet掉2.3%, ResNet-18B}
        - Trained Ternary Quantization [Chenzhuo Zhu, Song Han, Huizi Mao, William J. Dally, ICLR2017] {ResNet上效果更佳}
- w+a定点，g浮点
    - 二值网络
        - Binarized Neural Networks: Training Neural Networks with Weights and Activations Constrained to +1 or −1 [Matthieu Courbariaux, Yoshua Bengio, 2016.03.17] {CIFAR10上10.15%}
        - XNOR-Net: ImageNet Classification Using Binary Convolutional Neural Networks [Mohammad Rastegari, Washington University, 2016.03.16] {ImageNet上55.8%， 掉12.4%}
- w+a+g定点
    - Deep Learning with Limited Numerical Precision [ Suyog Gupta, Ankur Agrawal, IBM, 2015.02.09] {随机rounding技巧，CIFAR10上16bit+16bit+16bit复现正确率}
    - Training deep neural networks with low precision multiplications [Matthieu Courbariaux, Yoshua Bengio, ICLR 2015 Workshop] {CIFAR10上10bit+10bit+12bit复现正确率}
    - DoReFa-Net: Training Low Bitwidth Convolutional Neural Networks with Low Bitwidth Gradients [Shuchang Zhou, Zekun Ni, 2016.06.20] {1bit-2bit-4bit, 第一层和最后一层没有量化，ImageNet上掉8.8%}
    - Quantized Neural Networks: Training Neural Networks with Low Precision Weights and Activations [Itay Hubara, Matthieu Courbariaux, 2016.09.22]{1bit-2bit-6bit，ImageNet上超过DoReFa 0.33%}

================================================
FILE: utils/classifiers/pytorch_playground/setup.py
================================================
from setuptools import setup, find_packages

with open("requirements.txt") as requirements_file:
    REQUIREMENTS = requirements_file.readlines()

setup(
    name="pytorch-playground",
    version="1.0.0",
    author='Aaron Chen',
    author_email='aaron.xichen@gmail.com',
    packages=find_packages(),
    entry_points = {
        'console_scripts': [
            'quantize=quantize:main',
        ]
    },
    install_requires=REQUIREMENTS,

)


================================================
FILE: utils/classifiers/pytorch_playground/utee/__init__.py
================================================


================================================
FILE: utils/classifiers/pytorch_playground/utee/misc.py
================================================
import cv2
import os
import shutil
import pickle as pkl
import time
import numpy as np
import hashlib

from IPython import embed

class Logger(object):
    def __init__(self):
        self._logger = None

    def init(self, logdir, name='log'):
        if self._logger is None:
            import logging
            if not os.path.exists(logdir):
                os.makedirs(logdir)
            log_file = os.path.join(logdir, name)
            if os.path.exists(log_file):
                os.remove(log_file)
            self._logger = logging.getLogger()
            self._logger.setLevel('INFO')
            fh = logging.FileHandler(log_file)
            ch = logging.StreamHandler()
            self._logger.addHandler(fh)
            self._logger.addHandler(ch)

    def info(self, str_info):
        self.init('/tmp', 'tmp.log')
        self._logger.info(str_info)
logger = Logger()

print = logger.info
def ensure_dir(path, erase=False):
    if os.path.exists(path) and erase:
        print("Removing old folder {}".format(path))
        shutil.rmtree(path)
    if not os.path.exists(path):
        print("Creating folder {}".format(path))
        os.makedirs(path)

def load_pickle(path):
    begin_st = time.time()
    with open(path, 'rb') as f:
        print("Loading pickle object from {}".format(path))
        v = pkl.load(f)
    print("=> Done ({:.4f} s)".format(time.time() - begin_st))
    return v

def dump_pickle(obj, path):
    with open(path, 'wb') as f:
        print("Dumping pickle object to {}".format(path))
        pkl.dump(obj, f, protocol=pkl.HIGHEST_PROTOCOL)

def auto_select_gpu(mem_bound=500, utility_bound=0, gpus=(0, 1, 2, 3, 4, 5, 6, 7), num_gpu=1, selected_gpus=None):
    import sys
    import os
    import subprocess
    import re
    import time
    import numpy as np
    if 'CUDA_VISIBLE_DEVCIES' in os.environ:
        sys.exit(0)
    if selected_gpus is None:
        mem_trace = []
        utility_trace = []
        for i in range(5): # sample 5 times
            info = subprocess.check_output('nvidia-smi', shell=True).decode('utf-8')
            mem = [int(s[:-5]) for s in re.compile('\d+MiB\s/').findall(info)]
            utility = [int(re.compile('\d+').findall(s)[0]) for s in re.compile('\d+%\s+Default').findall(info)]
            mem_trace.append(mem)
            utility_trace.append(utility)
            time.sleep(0.1)
        mem = np.mean(mem_trace, axis=0)
        utility = np.mean(utility_trace, axis=0)
        assert(len(mem) == len(utility))
        nGPU = len(utility)
        ideal_gpus = [i for i in range(nGPU) if mem[i] <= mem_bound and utility[i] <= utility_bound and i in gpus]

        if len(ideal_gpus) < num_gpu:
            print("No sufficient resource, available: {}, require {} gpu".format(ideal_gpus, num_gpu))
            sys.exit(0)
        else:
            selected_gpus = list(map(str, ideal_gpus[:num_gpu]))
    else:
        selected_gpus = selected_gpus.split(',')

    print("Setting GPU: {}".format(selected_gpus))
    os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(selected_gpus)
    return selected_gpus

def expand_user(path):
    return os.path.abspath(os.path.expanduser(path))

def model_snapshot(model, new_file, old_file=None, verbose=False):
    from collections import OrderedDict
    import torch
    if isinstance(model, torch.nn.DataParallel):
        model = model.module
    if old_file and os.path.exists(expand_user(old_file)):
        if verbose:
            print("Removing old model {}".format(expand_user(old_file)))
        os.remove(expand_user(old_file))
    if verbose:
        print("Saving model to {}".format(expand_user(new_file)))

    state_dict = OrderedDict()
    for k, v in model.state_dict().items():
        if v.is_cuda:
            v = v.cpu()
        state_dict[k] = v
    torch.save(state_dict, expand_user(new_file))


def load_lmdb(lmdb_file, n_records=None):
    import lmdb
    import numpy as np
    lmdb_file = expand_user(lmdb_file)
    if os.path.exists(lmdb_file):
        data = []
        env = lmdb.open(lmdb_file, readonly=True, max_readers=512)
        with env.begin() as txn:
            cursor = txn.cursor()
            begin_st = time.time()
            print("Loading lmdb file {} into memory".format(lmdb_file))
            for key, value in cursor:
                _, target, _ = key.decode('ascii').split(':')
                target = int(target)
                img = cv2.imdecode(np.fromstring(value, np.uint8), cv2.IMREAD_COLOR)
                data.append((img, target))
                if n_records is not None and len(data) >= n_records:
                    break
        env.close()
        print("=> Done ({:.4f} s)".format(time.time() - begin_st))
        return data
    else:
        print("Not found lmdb file".format(lmdb_file))

def str2img(str_b):
    return cv2.imdecode(np.fromstring(str_b, np.uint8), cv2.IMREAD_COLOR)

def img2str(img):
    return cv2.imencode('.jpg', img)[1].tostring()

def md5(s):
    m = hashlib.md5()
    m.update(s)
    return m.hexdigest()

def eval_model(model, ds, n_sample=None, ngpu=1, is_imagenet=False):
    import tqdm
    import torch
    from torch import nn
    from torch.autograd import Variable

    class ModelWrapper(nn.Module):
        def __init__(self, model):
            super(ModelWrapper, self).__init__()
            self.model = model
            self.mean = [0.485, 0.456, 0.406]
            self.std = [0.229, 0.224, 0.225]

        def forward(self, input):
            input.data.div_(255.)
            input.data[:, 0, :, :].sub_(self.mean[0]).div_(self.std[0])
            input.data[:, 1, :, :].sub_(self.mean[1]).div_(self.std[1])
            input.data[:, 2, :, :].sub_(self.mean[2]).div_(self.std[2])
            return self.model(input)

    correct1, correct5 = 0, 0
    n_passed = 0
    if is_imagenet:
        model = ModelWrapper(model)
    model = model.eval()
    model = torch.nn.DataParallel(model, device_ids=range(ngpu)).cuda()

    n_sample = len(ds) if n_sample is None else n_sample
    for idx, (data, target) in enumerate(tqdm.tqdm(ds, total=n_sample)):
        n_passed += len(data)
        data =  Variable(torch.FloatTensor(data)).cuda()
        indx_target = torch.LongTensor(target)
        output = model(data)
        bs = output.size(0)
        idx_pred = output.data.sort(1, descending=True)[1]

        idx_gt1 = indx_target.expand(1, bs).transpose_(0, 1)
        idx_gt5 = idx_gt1.expand(bs, 5)

        correct1 += idx_pred[:, :1].cpu().eq(idx_gt1).sum()
        correct5 += idx_pred[:, :5].cpu().eq(idx_gt5).sum()

        if idx >= n_sample - 1:
            break

    acc1 = correct1 * 1.0 / n_passed
    acc5 = correct5 * 1.0 / n_passed
    return acc1, acc5

def load_state_dict(model, model_urls, model_root):
    from torch.utils import model_zoo
    from torch import nn
    import re
    from collections import OrderedDict
    own_state_old = model.state_dict()
    own_state = OrderedDict() # remove all 'group' string
    for k, v in own_state_old.items():
        k = re.sub('group\d+\.', '', k)
        own_state[k] = v

    state_dict = model_zoo.load_url(model_urls, model_root)

    for name, param in state_dict.items():
        if name not in own_state:
            print(own_state.keys())
            raise KeyError('unexpected key "{}" in state_dict'
                           .format(name))
        if isinstance(param, nn.Parameter):
            # backwards compatibility for serialized parameters
            param = param.data
        own_state[name].copy_(param)

    missing = set(own_state.keys()) - set(state_dict.keys())
    no_use = set(state_dict.keys()) - set(own_state.keys())
    if len(no_use) > 0:
        raise KeyError('some keys are not used: "{}"'.format(no_use))


================================================
FILE: utils/classifiers/pytorch_playground/utee/quant.py
================================================
from torch.autograd import Variable
import torch
from torch import nn
from collections import OrderedDict
import math
from IPython import embed

def compute_integral_part(input, overflow_rate):
    abs_value = input.abs().view(-1)
    sorted_value = abs_value.sort(dim=0, descending=True)[0]
    split_idx = int(overflow_rate * len(sorted_value))
    v = sorted_value[split_idx]
    if isinstance(v, Variable):
        v = float(v.data.cpu())
    sf = math.ceil(math.log2(v+1e-12))
    return sf

def linear_quantize(input, sf, bits):
    assert bits >= 1, bits
    if bits == 1:
        return torch.sign(input) - 1
    delta = math.pow(2.0, -sf)
    bound = math.pow(2.0, bits-1)
    min_val = - bound
    max_val = bound - 1
    rounded = torch.floor(input / delta + 0.5)

    clipped_value = torch.clamp(rounded, min_val, max_val) * delta
    return clipped_value

def log_minmax_quantize(input, bits):
    assert bits >= 1, bits
    if bits == 1:
        return torch.sign(input), 0.0, 0.0

    s = torch.sign(input)
    input0 = torch.log(torch.abs(input) + 1e-20)
    v = min_max_quantize(input0, bits-1)
    v = torch.exp(v) * s
    return v

def log_linear_quantize(input, sf, bits):
    assert bits >= 1, bits
    if bits == 1:
        return torch.sign(input), 0.0, 0.0

    s = torch.sign(input)
    input0 = torch.log(torch.abs(input) + 1e-20)
    v = linear_quantize(input0, sf, bits-1)
    v = torch.exp(v) * s
    return v

def min_max_quantize(input, bits):
    assert bits >= 1, bits
    if bits == 1:
        return torch.sign(input) - 1
    min_val, max_val = input.min(), input.max()

    if isinstance(min_val, Variable):
        max_val = float(max_val.data.cpu().numpy()[0])
        min_val = float(min_val.data.cpu().numpy()[0])

    input_rescale = (input - min_val) / (max_val - min_val)

    n = math.pow(2.0, bits) - 1
    v = torch.floor(input_rescale * n + 0.5) / n

    v =  v * (max_val - min_val) + min_val
    return v

def tanh_quantize(input, bits):
    assert bits >= 1, bits
    if bits == 1:
        return torch.sign(input)
    input = torch.tanh(input) # [-1, 1]
    input_rescale = (input + 1.0) / 2 #[0, 1]
    n = math.pow(2.0, bits) - 1
    v = torch.floor(input_rescale * n + 0.5) / n
    v = 2 * v - 1 # [-1, 1]

    v = 0.5 * torch.log((1 + v) / (1 - v)) # arctanh
    return v


class LinearQuant(nn.Module):
    def __init__(self, name, bits, sf=None, overflow_rate=0.0, counter=10):
        super(LinearQuant, self).__init__()
        self.name = name
        self._counter = counter

        self.bits = bits
        self.sf = sf
        self.overflow_rate = overflow_rate

    @property
    def counter(self):
        return self._counter

    def forward(self, input):
        if self._counter > 0:
            self._counter -= 1
            sf_new = self.bits - 1 - compute_integral_part(input, self.overflow_rate)
            self.sf = min(self.sf, sf_new) if self.sf is not None else sf_new
            return input
        else:
            output = linear_quantize(input, self.sf, self.bits)
            return output

    def __repr__(self):
        return '{}(sf={}, bits={}, overflow_rate={:.3f}, counter={})'.format(
            self.__class__.__name__, self.sf, self.bits, self.overflow_rate, self.counter)

class LogQuant(nn.Module):
    def __init__(self, name, bits, sf=None, overflow_rate=0.0, counter=10):
        super(LogQuant, self).__init__()
        self.name = name
        self._counter = counter

        self.bits = bits
        self.sf = sf
        self.overflow_rate = overflow_rate

    @property
    def counter(self):
        return self._counter

    def forward(self, input):
        if self._counter > 0:
            self._counter -= 1
            log_abs_input = torch.log(torch.abs(input))
            sf_new = self.bits - 1 - compute_integral_part(log_abs_input, self.overflow_rate)
            self.sf = min(self.sf, sf_new) if self.sf is not None else sf_new
            return input
        else:
            output = log_linear_quantize(input, self.sf, self.bits)
            return output

    def __repr__(self):
        return '{}(sf={}, bits={}, overflow_rate={:.3f}, counter={})'.format(
            self.__class__.__name__, self.sf, self.bits, self.overflow_rate, self.counter)

class NormalQuant(nn.Module):
    def __init__(self, name, bits, quant_func):
        super(NormalQuant, self).__init__()
        self.name = name
        self.bits = bits
        self.quant_func = quant_func

    @property
    def counter(self):
        return self._counter

    def forward(self, input):
        output = self.quant_func(input, self.bits)
        return output

    def __repr__(self):
        return '{}(bits={})'.format(self.__class__.__name__, self.bits)

def duplicate_model_with_quant(model, bits, overflow_rate=0.0, counter=10, type='linear'):
    """assume that original model has at least a nn.Sequential"""
    assert type in ['linear', 'minmax', 'log', 'tanh']
    if isinstance(model, nn.Sequential):
        l = OrderedDict()
        for k, v in model._modules.items():
            if isinstance(v, (nn.Conv2d, nn.Linear, nn.BatchNorm1d, nn.BatchNorm2d, nn.AvgPool2d)):
                l[k] = v
                if type == 'linear':
                    quant_layer = LinearQuant('{}_quant'.format(k), bits=bits, overflow_rate=overflow_rate, counter=counter)
                elif type == 'log':
                    # quant_layer = LogQuant('{}_quant'.format(k), bits=bits, overflow_rate=overflow_rate, counter=counter)
                    quant_layer = NormalQuant('{}_quant'.format(k), bits=bits, quant_func=log_minmax_quantize)
                elif type == 'minmax':
                    quant_layer = NormalQuant('{}_quant'.format(k), bits=bits, quant_func=min_max_quantize)
                else:
                    quant_layer = NormalQuant('{}_quant'.format(k), bits=bits, quant_func=tanh_quantize)
                l['{}_{}_quant'.format(k, type)] = quant_layer
            else:
                l[k] = duplicate_model_with_quant(v, bits, overflow_rate, counter, type)
        m = nn.Sequential(l)
        return m
    else:
        for k, v in model._modules.items():
            model._modules[k] = duplicate_model_with_quant(v, bits, overflow_rate, counter, type)
        return model


================================================
FILE: utils/classifiers/pytorch_playground/utee/selector.py
================================================
from utee import misc
import os
from imagenet import dataset
print = misc.logger.info
from IPython import embed

known_models = [
    'mnist', 'svhn', # 28x28
    'cifar10', 'cifar100', # 32x32
    'stl10', # 96x96
    'alexnet', # 224x224
    'vgg16', 'vgg16_bn', 'vgg19', 'vgg19_bn', # 224x224
    'resnet18', 'resnet34', 'resnet50', 'resnet101','resnet152', # 224x224
    'squeezenet_v0', 'squeezenet_v1', #224x224
    'inception_v3', # 299x299
]

def mnist(cuda=True, model_root=None):
    print("Building and initializing mnist parameters")
    from mnist import model, dataset
    m = model.mnist(pretrained=os.path.join(model_root, 'mnist.pth'))
    if cuda:
        m = m.cuda()
    return m, dataset.get, False

def svhn(cuda=True, model_root=None):
    print("Building and initializing svhn parameters")
    from svhn import model, dataset
    m = model.svhn(32, pretrained=os.path.join(model_root, 'svhn.pth'))
    if cuda:
        m = m.cuda()
    return m, dataset.get, False

def cifar10(cuda=True, model_root=None):
    print("Building and initializing cifar10 parameters")
    from cifar import model, dataset
    m = model.cifar10(128, pretrained=os.path.join(model_root, 'cifar10.pth'))
    if cuda:
        m = m.cuda()
    return m, dataset.get10, False

def cifar100(cuda=True, model_root=None):
    print("Building and initializing cifar100 parameters")
    from cifar import model, dataset
    m = model.cifar100(128, pretrained=os.path.join(model_root, 'cifar100.pth'))
    if cuda:
        m = m.cuda()
    return m, dataset.get100, False

def stl10(cuda=True, model_root=None):
    print("Building and initializing stl10 parameters")
    from stl10 import model, dataset
    m = model.stl10(32, pretrained=os.path.join(model_root, 'stl10.pth'))
    if cuda:
        m = m.cuda()
    return m, dataset.get, False

def alexnet(cuda=True, model_root=None):
    print("Building and initializing alexnet parameters")
    from imagenet import alexnet as alx
    m = alx.alexnet(True, model_root)
    if cuda:
        m = m.cuda()
    return m, dataset.get, True

def vgg16(cuda=True, model_root=None):
    print("Building and initializing vgg16 parameters")
    from imagenet import vgg
    m = vgg.vgg16(True, model_root)
    if cuda:
        m = m.cuda()
    return m, dataset.get, True

def vgg16_bn(cuda=True, model_root=None):
    print("Building vgg16_bn parameters")
    from imagenet import vgg
    m = vgg.vgg19_bn(model_root)
    if cuda:
        m = m.cuda()
    return m, dataset.get, True

def vgg19(cuda=True, model_root=None):
    print("Building and initializing vgg19 parameters")
    from imagenet import vgg
    m = vgg.vgg19(True, model_root)
    if cuda:
        m = m.cuda()
    return m, dataset.get, True

def vgg19_bn(cuda=True, model_root=None):
    print("Building vgg19_bn parameters")
    from imagenet import vgg
    m = vgg.vgg19_bn(model_root)
    if cuda:
        m = m.cuda()
    return m, dataset.get, True

def inception_v3(cuda=True, model_root=None):
    print("Building and initializing inception_v3 parameters")
    from imagenet import inception
    m = inception.inception_v3(True, model_root)
    if cuda:
        m = m.cuda()
    return m, dataset.get, True

def resnet18(cuda=True, model_root=None):
    print("Building and initializing resnet-18 parameters")
    from imagenet import resnet
    m = resnet.resnet18(True, model_root)
    if cuda:
        m = m.cuda()
    return m, dataset.get, True

def resnet34(cuda=True, model_root=None):
    print("Building and initializing resnet-34 parameters")
    from imagenet import resnet
    m = resnet.resnet34(True, model_root)
    if cuda:
        m = m.cuda()
    return m, dataset.get, True

def resnet50(cuda=True, model_root=None):
    print("Building and initializing resnet-50 parameters")
    from imagenet import resnet
    m = resnet.resnet50(True, model_root)
    if cuda:
        m = m.cuda()
    return m, dataset.get, True

def resnet101(cuda=True, model_root=None):
    print("Building and initializing resnet-101 parameters")
    from imagenet import resnet
    m = resnet.resnet101(True, model_root)
    if cuda:
        m = m.cuda()
    return m, dataset.get, True

def resnet152(cuda=True, model_root=None):
    print("Building and initializing resnet-152 parameters")
    from imagenet import resnet
    m = resnet.resnet152(True, model_root)
    if cuda:
        m = m.cuda()
    return m, dataset.get, True

def squeezenet_v0(cuda=True, model_root=None):
    print("Building and initializing squeezenet_v0 parameters")
    from imagenet import squeezenet
    m = squeezenet.squeezenet1_0(True, model_root)
    if cuda:
        m = m.cuda()
    return m, dataset.get, True

def squeezenet_v1(cuda=True, model_root=None):
    print("Building and initializing squeezenet_v1 parameters")
    from imagenet import squeezenet
    m = squeezenet.squeezenet1_1(True, model_root)
    if cuda:
        m = m.cuda()
    return m, dataset.get, True

def select(model_name, **kwargs):
    assert model_name in known_models, model_name
    kwargs.setdefault('model_root', os.path.expanduser('~/.torch/models'))
    return eval('{}'.format(model_name))(**kwargs)

if __name__ == '__main__':
    m1 = alexnet()
    embed()


================================================
FILE: utils/classifiers/stacked_mnist.py
================================================
import torch
from torch import nn
import torch.utils.model_zoo as model_zoo
from collections import OrderedDict
from torchvision import datasets
from torch.nn import functional as F
from torchvision import transforms

CLASSIFIER_PATH = 'mnist_model.pt'

class Classifier():
    def __init__(self):
        self.mnist = MNISTClassifier().cuda()

        try:
            self.mnist.load(CLASSIFIER_PATH)
        except Exception as e:
            print(e)
            self.mnist.train()
        

    def get_predictions(self, x):
        assert(x.size(1) == 3)
        result = self.mnist.get_predictions(x[:, 0, :, :])
        for channel_number in range(1, 3):
            result = result + self.mnist.get_predictions(x[:, channel_number, :, :]) * 10**channel_number
        return result

def get_mnist_dataloader(batch_size=100):
    dataset = datasets.MNIST('data/MNIST', train=True, transform=transforms.Compose([
                                    transforms.Resize(32),
                                    transforms.CenterCrop(32),
                                    transforms.ToTensor(),
                                    transforms.Normalize((0.5, ), (0.5, ))
                                ]))

    return torch.utils.data.DataLoader(
                dataset,
                batch_size=batch_size,
                num_workers=12,
                shuffle=True,
                pin_memory=True,
                sampler=None,
                drop_last=True)

class MNISTClassifier(nn.Module):
    def __init__(self, input_dims=1024, n_hiddens=[256, 256], n_class=10):
        super(MNISTClassifier, self).__init__()
        self.input_dims = input_dims
        
        current_dims = input_dims
        layers = OrderedDict()
        for i, n_hidden in enumerate(n_hiddens):
            layers['fc{}'.format(i+1)] = nn.Linear(current_dims, n_hidden)
            layers['relu{}'.format(i+1)] = nn.ReLU()
            layers['drop{}'.format(i+1)] = nn.Dropout(0.2)
            current_dims = n_hidden
        layers['out'] = nn.Linear(current_dims, n_class)

        self.model= nn.Sequential(layers)
        print(self.model)

    def forward(self, input):
        input = input.view(input.size(0), -1)
        assert input.size(1) == self.input_dims
        return self.model.forward(input)

    def get_predictions(self, input):
        logits = self.forward(input)
        return logits.argmax(dim=1)

    def load(self, path):
        self.load_state_dict(torch.load(path))
        print('Loaded pretrained MNIST classifier')

    def train(self):
        print('Training MNIST classifier')
        dataloader = get_mnist_dataloader()        
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)

        for epoch in range(10):
            for it, (x, y) in enumerate(dataloader):
                optimizer.zero_grad()
                x, y = x.cuda(), y.cuda()
                logits = self.forward(x)
                loss = F.cross_entropy(logits, y)
                loss.backward()
                optimizer.step()
                if it % 100 == 0:
                    acc = (self.get_predictions(x) == y).float().mean().item()
                    print(f'[{epoch}, {it}], closs={loss}, acc={acc}')
        

        torch.save(self.state_dict(), CLASSIFIER_PATH)
        

if __name__ == '__main__':
    classifier = Classifier()
    train_loader = get_mnist_dataloader(10)  
    xs, ys = [], []
    for i, (x, y) in enumerate(train_loader):
        if i == 3:
            break  
        xs.append(x.cuda())
        ys.append(y)
    print(ys)
    print(classifier.get_predictions(torch.cat(xs, dim=1)))

================================================
FILE: utils/get_empirical_distribution.py
================================================
import argparse
import os
from tqdm import tqdm

import json
import numpy as np

from classifiers import classifier_dict
from np_to_pt_img import np_to_pt


def get_empirical_distribution(path_to_samples):
    ''' gets the fake and real distributions induced by the classifier '''
    results = {}

    with np.load(path_to_samples, allow_pickle=True) as data:
        for datatype in ['fake']:  # , 'real'
            counts = {}
            results[datatype] = counts
            imgs = data[datatype]
            print(f'Found {len(imgs)} samples in {path_to_samples}')
            for it in tqdm(range(len(imgs) // batch_size)):
                x_batch = np_to_pt(imgs[it * batch_size:(it + 1) * batch_size]).cuda()
                y_pred = classifier.get_predictions(x_batch)
                for yi in y_pred:
                    yi = yi.item()
                    if yi not in counts:
                        counts[yi] = 0
                    counts[yi] += 1
            counts = {str(k): v / len(imgs) for k, v in counts.items()}
    return results


def get_kl(fake, nclasses):
    '''computes the log10 kl between empirical distributions.'''
    result = 0
    total = sum([v for k, v in fake.items()])
    for c, count in fake.items():
        pi = count / total
        # log10 seems to reproduce pacgan results
        result += pi * np.log10(pi * nclasses)
    return result


nmodes_gt = {'places': 365, 'cifar': 10, 'imagenet': 1000, 'stacked_mnist': 1000}

if __name__ == '__main__':
    parser = argparse.ArgumentParser('compute empirical distributions and reverse-kl metrics')
    parser.add_argument('--samples', help='path to samples')
    parser.add_argument('--it', type=str, help='iteration number (can be \'pretrained\') of samples')
    parser.add_argument('--results_dir', help='path to results_dir')
    parser.add_argument('--dataset', type=str, required=True)
    parser.add_argument('--batch_size', type=int, default=100)
    args = parser.parse_args()

    batch_size = args.batch_size
    classifier = classifier_dict[args.dataset]()
    it = args.it
    results_dir = args.results_dir
    result = get_empirical_distribution(args.samples)
    nmodes = len(result['fake'])
    nclasses = nmodes_gt[args.dataset]

    kl = get_kl(result['fake'], nclasses)

    with open(os.path.join(args.results_dir, 'kl_results.json')) as f:
        kl_results = json.load(f)
    with open(os.path.join(args.results_dir, 'nmodes_results.json')) as f:
        nmodes_results = json.load(f)

    kl_results[it] = kl
    nmodes_results[it] = nmodes

    print(f'{results_dir} iteration {it} KL: {kl} Covered {nmodes} out of {nclasses} total modes')

    with open(os.path.join(args.results_dir, 'kl_results.json'), 'w') as f:
        f.write(json.dumps(kl_results))
    with open(os.path.join(args.results_dir, 'nmodes_results.json'), 'w') as f:
        f.write(json.dumps(nmodes_results))


================================================
FILE: utils/get_gt_imgs.py
================================================
import os
import argparse
from tqdm import tqdm
from PIL import Image
import torch
from torchvision import transforms, datasets
import numpy as np
import random


def get_images(root, N):
    if False and os.path.exists(root + '.txt'):
        with open(os.path.exists(root + '.txt')) as f:
            files = f.readlines()
            random.shuffle(files)
            return files
    else:
        all_files = []
        for i, (dp, dn, fn) in enumerate(os.walk(os.path.expanduser(root))):
            for j, f in enumerate(fn):
                if j >= 1000:
                    break     # don't get whole dataset, just get enough images per class
                if f.endswith(('.png', '.webp', 'jpg', '.JPEG')):
                    all_files.append(os.path.join(dp, f))
        random.shuffle(all_files)
        return all_files


def pt_to_np(imgs):
    '''normalizes pytorch image in [-1, 1] to [0, 255]'''
    return (imgs.permute(0, 2, 3, 1).mul_(0.5).add_(0.5).mul_(255)).clamp_(0, 255).numpy()


def get_transform(size):
    return transforms.Compose([
        transforms.Resize(size),
        transforms.CenterCrop(size),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])


def get_gt_samples(dataset, nimgs=50000):
    if dataset != 'cifar':
        transform = get_transform(sizes[dataset])
        all_images = get_images(paths[dataset], nimgs)
        images = []
        for file_path in tqdm(all_images[:nimgs]):
            images.append(transform(Image.open(file_path).convert('RGB')))
        return pt_to_np(torch.stack(images))
    else:
        data = datasets.CIFAR10(paths[dataset], transform=get_transform(sizes[dataset]))
        images = []
        for x, y in tqdm(data):
            images.append(x)
        return pt_to_np(torch.stack(images))


paths = {
    'imagenet': 'data/ImageNet',
    'places': 'data/Places365',
    'cifar': 'data/CIFAR'
}

sizes = {'imagenet': 128, 'places': 128, 'cifar': 32}

if __name__ == "__main__":
    parser = argparse.ArgumentParser('Save a batch of ground truth train set images for evaluation')
    parser.add_argument('--cifar', action='store_true')
    parser.add_argument('--imagenet', action='store_true')
    parser.add_argument('--places', action='store_true')
    args = parser.parse_args()

    os.makedirs('output', exist_ok=True)

    if args.cifar:
        cifar_samples = get_gt_samples('cifar', nimgs=50000)
        np.savez('output/cifar_gt_imgs.npz', fake=cifar_samples, real=cifar_samples)
    if args.imagenet:
        imagenet_samples = get_gt_samples('imagenet', nimgs=50000)
        np.savez('output/imagenet_gt_imgs.npz', fake=imagenet_samples, real=imagenet_samples)
    if args.places:
        places_samples = get_gt_samples('places', nimgs=50000)
        np.savez('output/places_gt_imgs.npz', fake=places_samples, real=places_samples)


================================================
FILE: utils/np_to_pt_img.py
================================================
import torch


def np_to_pt(x):
    ''' permutes the appropriate channels to turn numpy formatted images to pt formatted images. does NOT renormalize '''
    x = torch.from_numpy(x)
    if len(x.shape) == 4:
        return x.permute(0, 3, 1, 2)
    elif len(x.shape) == 3:
        return x.permute(2, 0, 1)
    else:
        raise NotImplementedError


================================================
FILE: visualize_clusters.py
================================================
import argparse
import os
import shutil
import torch
import torchvision

from torch import nn
from gan_training import utils
from gan_training.inputs import get_dataset
from gan_training.checkpoints import CheckpointIO
from gan_training.config import load_config
from seeded_sampler import SeededSampler

torch.backends.cudnn.benchmark = True

# Arguments
parser = argparse.ArgumentParser(description='Visualize the samples/clusters of a class-conditional GAN')
parser.add_argument('config', type=str, help='Path to config file.')
parser.add_argument('--model_it', type=int, help='If you want to load from a specific model iteration')
parser.add_argument('--show_clusters', action='store_true', help='show the real images. Requires a path to the real image train directory')
args = parser.parse_args()

config = load_config(args.config, 'configs/default.yaml')
out_dir = config['training']['out_dir']


def main():
    checkpoint_dir = os.path.join(out_dir, 'chkpts')

    most_recent = utils.get_most_recent(checkpoint_dir, 'model') if args.model_it is None else args.model_it

    cluster_path = os.path.join(out_dir, 'clusters')
    print('Saving clusters/samples to', cluster_path)

    os.makedirs(cluster_path, exist_ok=True)

    shutil.copyfile('seeing/lightbox.html', os.path.join(cluster_path, '+lightbox.html'))

    checkpoint_io = CheckpointIO(checkpoint_dir=checkpoint_dir)

    most_recent = utils.get_most_recent(checkpoint_dir, 'model') if args.model_it is None else args.model_it
    clusterer = checkpoint_io.load_clusterer(most_recent, pretrained=config['pretrained'], load_samples=False)

    if isinstance(clusterer.discriminator, nn.DataParallel):
        clusterer.discriminator = clusterer.discriminator.module

    model_path = os.path.join(checkpoint_dir, 'model_%08d.pt' % most_recent)
    sampler = SeededSampler(args.config,
                            model_path=model_path,
                            clusterer_path=os.path.join(checkpoint_dir, f'clusterer{most_recent}.pkl'),
                            pretrained=config['pretrained'])

    if args.show_clusters:
        clusters = [[] for _ in range(config['generator']['nlabels'])]
        train_dataset, _ = get_dataset(
            name='webp'
            if 'cifar' not in config['data']['train_dir'].lower() else 'cifar10',
            data_dir=config['data']['train_dir'],
            size=config['data']['img_size'])

        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=config['training']['batch_size'],
            num_workers=config['training']['nworkers'],
            shuffle=True,
            pin_memory=True,
            sampler=None,
            drop_last=True)

        print('Generating clusters')
        for batch_num, (x_real, y_gt) in enumerate(train_loader):
            x_real = x_real.cuda()
            y_pred = clusterer.get_labels(x_real, y_gt)

            for i, yi in enumerate(y_pred):
                clusters[yi].append(x_real[i].cpu())

            # don't generate too many, we're only visualizing 20 per cluster
            if batch_num * config['training']['batch_size'] >= 10000:
                break
    else:
        clusters = [None] * config['generator']['nlabels']

    nimgs = 20
    nrows = 4

    for i in range(len(clusters)):
        if clusters[i] is None:
            pass
        elif len(clusters[i]) >= nimgs:
            cluster = torch.stack(clusters[i])[:nimgs]

            torchvision.utils.save_image(cluster * 0.5 + 0.5,
                                         os.path.join(cluster_path, f'{i}_real.png'),
                                         nrow=nrows)
        generated = []
        for seed in range(nimgs):
            img = sampler.conditional_sample(i, seed=seed)
            generated.append(img.detach().cpu())
        generated = torch.cat(generated)

        torchvision.utils.save_image(generated * 0.5 + 0.5,
                                     os.path.join(cluster_path, f'{i}_gen.png'),
                                     nrow=nrows)

    print('Clusters/samples can be visualized under', cluster_path)


if __name__ == '__main__':
    main()