Repository: YangNaruto/FQ-GAN Branch: master Commit: bc43d45700b2 Files: 87 Total size: 732.2 KB Directory structure: gitextract_joocyhq2/ ├── FQ-BigGAN/ │ ├── BigGAN.py │ ├── BigGANdeep.py │ ├── LICENSE │ ├── TFHub/ │ │ ├── README.md │ │ ├── biggan_v1.py │ │ └── converter.py │ ├── animal_hash.py │ ├── calculate_inception_moments.py │ ├── datasets.py │ ├── inception_tf13.py │ ├── inception_utils.py │ ├── layers.py │ ├── losses.py │ ├── make_hdf5.py │ ├── sample.py │ ├── scripts/ │ │ ├── launch_C10.sh │ │ ├── launch_C100.sh │ │ ├── launch_I128_bs256x4.sh │ │ ├── launch_I64_bs128x4.sh │ │ └── utils/ │ │ ├── duplicate.sh │ │ ├── prepare_data.sh │ │ └── trans.py │ ├── sync_batchnorm/ │ │ ├── __init__.py │ │ ├── batchnorm.py │ │ ├── batchnorm_reimpl.py │ │ ├── comm.py │ │ ├── replicate.py │ │ └── unittest.py │ ├── train.py │ ├── train_fns.py │ ├── utility/ │ │ ├── extract_imagenet.py │ │ └── untar.py │ ├── utils.py │ └── vq_layer.py ├── FQ-StyleGAN/ │ ├── LICENSE.txt │ ├── dataset_tool.py │ ├── dnnlib/ │ │ ├── __init__.py │ │ ├── submission/ │ │ │ ├── __init__.py │ │ │ ├── internal/ │ │ │ │ ├── __init__.py │ │ │ │ └── local.py │ │ │ ├── run_context.py │ │ │ └── submit.py │ │ ├── tflib/ │ │ │ ├── __init__.py │ │ │ ├── autosummary.py │ │ │ ├── custom_ops.py │ │ │ ├── network.py │ │ │ ├── ops/ │ │ │ │ ├── __init__.py │ │ │ │ ├── fused_bias_act.cu │ │ │ │ ├── fused_bias_act.py │ │ │ │ ├── upfirdn_2d.cu │ │ │ │ └── upfirdn_2d.py │ │ │ ├── optimizer.py │ │ │ └── tfutil.py │ │ └── util.py │ ├── metrics/ │ │ ├── __init__.py │ │ ├── frechet_inception_distance.py │ │ ├── inception_score.py │ │ ├── linear_separability.py │ │ ├── metric_base.py │ │ ├── metric_defaults.py │ │ ├── perceptual_path_length.py │ │ └── precision_recall.py │ ├── pretrained_networks.py │ ├── projector.py │ ├── run_generator.py │ ├── run_metrics.py │ ├── run_projector.py │ ├── run_training.py │ ├── test_nvcc │ ├── test_nvcc.cu │ └── training/ │ ├── __init__.py │ ├── dataset.py │ ├── loss.py │ ├── misc.py │ ├── networks_stylegan.py │ ├── networks_stylegan2.py │ └── training_loop.py ├── FQ-U-GAT-IT/ │ ├── LICENSE │ ├── UGATIT.py │ ├── dataset/ │ │ └── download_dataset_1.sh │ ├── download_dataset_2.sh │ ├── logger.py │ ├── main.py │ ├── ops.py │ ├── utils.py │ └── vq_layer.py └── README.md ================================================ FILE CONTENTS ================================================ ================================================ FILE: FQ-BigGAN/BigGAN.py ================================================ import numpy as np import math import functools import torch import torch.nn as nn from torch.nn import init import torch.optim as optim import torch.nn.functional as F from torch.nn import Parameter as P from vq_layer import Quantize import layers from sync_batchnorm import SynchronizedBatchNorm2d as SyncBatchNorm2d # Architectures for G # Attention is passed in in the format '32_64' to mean applying an attention # block at both resolution 32x32 and 64x64. Just '64' will apply at 64x64. def G_arch(ch=64, attention='64', ksize='333333', dilation='111111'): arch = {} arch[512] = {'in_channels' : [ch * item for item in [16, 16, 8, 8, 4, 2, 1]], 'out_channels' : [ch * item for item in [16, 8, 8, 4, 2, 1, 1]], 'upsample' : [True] * 7, 'resolution' : [8, 16, 32, 64, 128, 256, 512], 'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')]) for i in range(3,10)}} arch[256] = {'in_channels' : [ch * item for item in [16, 16, 8, 8, 4, 2]], 'out_channels' : [ch * item for item in [16, 8, 8, 4, 2, 1]], 'upsample' : [True] * 6, 'resolution' : [8, 16, 32, 64, 128, 256], 'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')]) for i in range(3,9)}} arch[128] = {'in_channels' : [ch * item for item in [16, 16, 8, 4, 2]], 'out_channels' : [ch * item for item in [16, 8, 4, 2, 1]], 'upsample' : [True] * 5, 'resolution' : [8, 16, 32, 64, 128], 'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')]) for i in range(3,8)}} arch[64] = {'in_channels' : [ch * item for item in [16, 16, 8, 4]], 'out_channels' : [ch * item for item in [16, 8, 4, 2]], 'upsample' : [True] * 4, 'resolution' : [8, 16, 32, 64], 'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')]) for i in range(3,7)}} arch[32] = {'in_channels' : [ch * item for item in [4, 4, 4]], 'out_channels' : [ch * item for item in [4, 4, 4]], 'upsample' : [True] * 3, 'resolution' : [8, 16, 32], 'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')]) for i in range(3,6)}} return arch class Generator(nn.Module): def __init__(self, G_ch=64, dim_z=128, bottom_width=4, resolution=128, G_kernel_size=3, G_attn='64', n_classes=1000, num_G_SVs=1, num_G_SV_itrs=1, G_shared=True, shared_dim=0, hier=False, cross_replica=False, mybn=False, G_activation=nn.ReLU(inplace=False), G_lr=5e-5, G_B1=0.0, G_B2=0.999, adam_eps=1e-8, BN_eps=1e-5, SN_eps=1e-12, G_mixed_precision=False, G_fp16=False, G_init='ortho', skip_init=False, no_optim=False, G_param='SN', norm_style='bn', **kwargs): super(Generator, self).__init__() # Channel width mulitplier self.ch = G_ch # Dimensionality of the latent space self.dim_z = dim_z # The initial spatial dimensions self.bottom_width = bottom_width # Resolution of the output self.resolution = resolution # Kernel size? self.kernel_size = G_kernel_size # Attention? self.attention = G_attn # number of classes, for use in categorical conditional generation self.n_classes = n_classes # Use shared embeddings? self.G_shared = G_shared # Dimensionality of the shared embedding? Unused if not using G_shared self.shared_dim = shared_dim if shared_dim > 0 else dim_z # Hierarchical latent space? self.hier = hier # Cross replica batchnorm? self.cross_replica = cross_replica # Use my batchnorm? self.mybn = mybn # nonlinearity for residual blocks self.activation = G_activation # Initialization style self.init = G_init # Parameterization style self.G_param = G_param # Normalization style self.norm_style = norm_style # Epsilon for BatchNorm? self.BN_eps = BN_eps # Epsilon for Spectral Norm? self.SN_eps = SN_eps # fp16? self.fp16 = G_fp16 # Architecture dict self.arch = G_arch(self.ch, self.attention)[resolution] # If using hierarchical latents, adjust z if self.hier: # Number of places z slots into self.num_slots = len(self.arch['in_channels']) + 1 self.z_chunk_size = (self.dim_z // self.num_slots) # Recalculate latent dimensionality for even splitting into chunks self.dim_z = self.z_chunk_size * self.num_slots else: self.num_slots = 1 self.z_chunk_size = 0 # Which convs, batchnorms, and linear layers to use if self.G_param == 'SN': self.which_conv = functools.partial(layers.SNConv2d, kernel_size=3, padding=1, num_svs=num_G_SVs, num_itrs=num_G_SV_itrs, eps=self.SN_eps) self.which_linear = functools.partial(layers.SNLinear, num_svs=num_G_SVs, num_itrs=num_G_SV_itrs, eps=self.SN_eps) else: self.which_conv = functools.partial(nn.Conv2d, kernel_size=3, padding=1) self.which_linear = nn.Linear # We use a non-spectral-normed embedding here regardless; # For some reason applying SN to G's embedding seems to randomly cripple G self.which_embedding = nn.Embedding bn_linear = (functools.partial(self.which_linear, bias=False) if self.G_shared else self.which_embedding) ##TODO: Modify BN self.which_bn = functools.partial(layers.ccbn, which_linear=bn_linear, cross_replica=self.cross_replica, mybn=self.mybn, input_size=(self.shared_dim + self.z_chunk_size if self.G_shared else self.n_classes), norm_style=self.norm_style, eps=self.BN_eps) # self.which_bn = functools.partial(layers.bn, # cross_replica=self.cross_replica, # mybn=self.mybn, # eps=self.BN_eps) # Prepare model # If not using shared embeddings, self.shared is just a passthrough self.shared = (self.which_embedding(n_classes, self.shared_dim) if G_shared else layers.identity()) # First linear layer self.linear = self.which_linear(self.dim_z // self.num_slots, self.arch['in_channels'][0] * (self.bottom_width **2)) # self.blocks is a doubly-nested list of modules, the outer loop intended # to be over blocks at a given resolution (resblocks and/or self-attention) # while the inner loop is over a given block self.blocks = [] for index in range(len(self.arch['out_channels'])): self.blocks += [[layers.GBlock(in_channels=self.arch['in_channels'][index], out_channels=self.arch['out_channels'][index], which_conv=self.which_conv, which_bn=self.which_bn, activation=self.activation, upsample=(functools.partial(F.interpolate, scale_factor=2) if self.arch['upsample'][index] else None))]] # If attention on this block, attach it to the end if self.arch['attention'][self.arch['resolution'][index]]: print('Adding attention layer in G at resolution %d' % self.arch['resolution'][index]) self.blocks[-1] += [layers.Attention(self.arch['out_channels'][index], self.which_conv)] # Turn self.blocks into a ModuleList so that it's all properly registered. self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks]) # output layer: batchnorm-relu-conv. # Consider using a non-spectral conv here self.output_layer = nn.Sequential(layers.bn(self.arch['out_channels'][-1], cross_replica=self.cross_replica, mybn=self.mybn), self.activation, self.which_conv(self.arch['out_channels'][-1], 3)) # Initialize weights. Optionally skip init for testing. if not skip_init: self.init_weights() # Set up optimizer # If this is an EMA copy, no need for an optim, so just return now if no_optim: return self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps if G_mixed_precision: print('Using fp16 adam in G...') import utils self.optim = utils.Adam16(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps) else: self.optim = optim.Adam(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps) # LR scheduling, left here for forward compatibility # self.lr_sched = {'itr' : 0}# if self.progressive else {} # self.j = 0 # Initialize def init_weights(self): self.param_count = 0 for module in self.modules(): if (isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear) or isinstance(module, nn.Embedding)): if self.init == 'ortho': init.orthogonal_(module.weight) elif self.init == 'N02': init.normal_(module.weight, 0, 0.02) elif self.init in ['glorot', 'xavier']: init.xavier_uniform_(module.weight) else: print('Init style not recognized...') self.param_count += sum([p.data.nelement() for p in module.parameters()]) print('Param count for G''s initialized parameters: %d' % self.param_count) # Note on this forward function: we pass in a y vector which has # already been passed through G.shared to enable easy class-wise # interpolation later. If we passed in the one-hot and then ran it through # G.shared in this forward function, it would be harder to handle. def forward(self, z, y): # If hierarchical, concatenate zs and ys if self.hier: zs = torch.split(z, self.z_chunk_size, 1) z = zs[0] ys = [torch.cat([y, item], 1) for item in zs[1:]] else: ys = [y] * len(self.blocks) # First linear layer h = self.linear(z) # Reshape h = h.view(h.size(0), -1, self.bottom_width, self.bottom_width) # Loop over blocks for index, blocklist in enumerate(self.blocks): # Second inner loop in case block has multiple layers for block in blocklist: h = block(h, ys[index]) # Apply batchnorm-relu-conv-tanh at output return torch.tanh(self.output_layer(h)) # Discriminator architecture, same paradigm as G's above def D_arch(ch=64, attention='64',ksize='333333', dilation='111111'): arch = {} arch[256] = {'in_channels' : [3] + [ch*item for item in [1, 2, 4, 8, 8, 16]], 'out_channels' : [item * ch for item in [1, 2, 4, 8, 8, 16, 16]], 'downsample' : [True] * 6 + [False], 'resolution' : [128, 64, 32, 16, 8, 4, 4 ], 'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')] for i in range(2,8)}} arch[128] = {'in_channels' : [3] + [ch*item for item in [1, 2, 4, 8, 16]], 'out_channels' : [item * ch for item in [1, 2, 4, 8, 16, 16]], 'downsample' : [True] * 5 + [False], 'resolution' : [64, 32, 16, 8, 4, 4], 'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')] for i in range(2,8)}} arch[64] = {'in_channels' : [3] + [ch*item for item in [1, 2, 4, 8]], 'out_channels' : [item * ch for item in [1, 2, 4, 8, 16]], 'downsample' : [True] * 4 + [False], 'resolution' : [32, 16, 8, 4, 4], 'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')] for i in range(2,7)}} arch[32] = {'in_channels' : [3] + [item * ch for item in [4, 4, 4]], 'out_channels' : [item * ch for item in [4, 4, 4, 4]], 'downsample' : [True, True, False, False], 'resolution' : [16, 16, 16, 16], 'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')] for i in range(2,6)}} return arch class Discriminator(nn.Module): def __init__(self, D_ch=64, D_wide=True, resolution=128, D_kernel_size=3, D_attn='64', n_classes=1000, num_D_SVs=1, num_D_SV_itrs=1, D_activation=nn.ReLU(inplace=False), D_lr=2e-4, D_B1=0.0, D_B2=0.999, adam_eps=1e-8, SN_eps=1e-12, output_dim=1, D_mixed_precision=False, D_fp16=False, D_init='ortho', skip_init=False, D_param='SN', dict_decay=0.8, commitment=0.5, discrete_layer='2', dict_size=10, **kwargs): super(Discriminator, self).__init__() # Width multiplier self.ch = D_ch # Use Wide D as in BigGAN and SA-GAN or skinny D as in SN-GAN? self.D_wide = D_wide # Resolution self.resolution = resolution # Kernel size self.kernel_size = D_kernel_size # Attention? self.attention = D_attn # Number of classes self.n_classes = n_classes # Activation self.activation = D_activation # Initialization style self.init = D_init # Parameterization style self.D_param = D_param # Epsilon for Spectral Norm? self.SN_eps = SN_eps # Fp16? self.fp16 = D_fp16 # Architecture self.arch = D_arch(self.ch, self.attention)[resolution] # Which convs, batchnorms, and linear layers to use # No option to turn off SN in D right now if self.D_param == 'SN': self.which_conv = functools.partial(layers.SNConv2d, kernel_size=3, padding=1, num_svs=num_D_SVs, num_itrs=num_D_SV_itrs, eps=self.SN_eps) self.which_linear = functools.partial(layers.SNLinear, num_svs=num_D_SVs, num_itrs=num_D_SV_itrs, eps=self.SN_eps) self.which_embedding = functools.partial(layers.SNEmbedding, num_svs=num_D_SVs, num_itrs=num_D_SV_itrs, eps=self.SN_eps) # Prepare model # self.blocks is a doubly-nested list of modules, the outer loop intended # to be over blocks at a given resolution (resblocks and/or self-attention) self.blocks = [] self.quant_layer = [int(x) for x in discrete_layer] for index in range(len(self.arch['out_channels'])): self.blocks += [[layers.DBlock(in_channels=self.arch['in_channels'][index], out_channels=self.arch['out_channels'][index], which_conv=self.which_conv, wide=self.D_wide, activation=self.activation, preactivation=(index > 0), downsample=(nn.AvgPool2d(2) if self.arch['downsample'][index] else None))]] if index in self.quant_layer: self.blocks[-1] += [Quantize(self.arch['out_channels'][index], 2 ** dict_size, commitment=commitment, decay=dict_decay, )] # If attention on this block, attach it to the end if self.arch['attention'][self.arch['resolution'][index]]: print('Adding attention layer in D at resolution %d' % self.arch['resolution'][index]) self.blocks[-1] += [layers.Attention(self.arch['out_channels'][index], self.which_conv)] # Turn self.blocks into a ModuleList so that it's all properly registered. self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks]) # Linear output layer. The output dimension is typically 1, but may be # larger if we're e.g. turning this into a VAE with an inference output self.linear = self.which_linear(self.arch['out_channels'][-1], output_dim) # Embedding for projection discrimination self.embed = self.which_embedding(self.n_classes, self.arch['out_channels'][-1]) # Initialize weights if not skip_init: self.init_weights() # Set up optimizer self.lr, self.B1, self.B2, self.adam_eps = D_lr, D_B1, D_B2, adam_eps if D_mixed_precision: print('Using fp16 adam in D...') import utils self.optim = utils.Adam16(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps) else: self.optim = optim.Adam(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps) # LR scheduling, left here for forward compatibility # self.lr_sched = {'itr' : 0}# if self.progressive else {} # self.j = 0 # Initialize def init_weights(self): self.param_count = 0 for module in self.modules(): if (isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear) or isinstance(module, nn.Embedding)): if self.init == 'ortho': init.orthogonal_(module.weight) elif self.init == 'N02': init.normal_(module.weight, 0, 0.02) elif self.init in ['glorot', 'xavier']: init.xavier_uniform_(module.weight) else: print('Init style not recognized...') self.param_count += sum([p.data.nelement() for p in module.parameters()]) print('Param count for D''s initialized parameters: %d' % self.param_count) def forward(self, x, y=None): # Stick x into h for cleaner for loops without flow control h = x quant_loss = 0 # Loop over blocks for index, blocklist in enumerate(self.blocks): if index in self.quant_layer: h = blocklist[0](h) # print(h.shape) h_, diff, ppl = blocklist[1](h) if len(blocklist) == 3: h = blocklist[2](h) quant_loss += diff else: for block in blocklist: h = block(h) # Apply global sum pooling as in SN-GAN h = torch.sum(self.activation(h), [2, 3]) # Get initial class-unconditional output out = self.linear(h) ## TODO: Uncomment for Class conditional # Get projection of final featureset onto class vectors and add to evidence out = out + torch.sum(self.embed(y) * h, 1, keepdim=True) return out, quant_loss, ppl # Parallelized G_D to minimize cross-gpu communication # Without this, Generator outputs would get all-gathered and then rebroadcast. class G_D(nn.Module): def __init__(self, G, D): super(G_D, self).__init__() self.G = G self.D = D def forward(self, z, gy, x=None, dy=None, train_G=False, return_G_z=False, split_D=False): # If training G, enable grad tape with torch.set_grad_enabled(train_G): # Get Generator output given noise G_z = self.G(z, self.G.shared(gy)) # Cast as necessary if self.G.fp16 and not self.D.fp16: G_z = G_z.float() if self.D.fp16 and not self.G.fp16: G_z = G_z.half() # Split_D means to run D once with real data and once with fake, # rather than concatenating along the batch dimension. if split_D: D_fake, quant_loss_fake, ppl = self.D(G_z, gy) if x is not None: D_real, quant_loss_real, ppl = self.D(x, dy) return D_fake, D_real, quant_loss_fake, quant_loss_real else: if return_G_z: return D_fake, G_z else: return D_fake, quant_loss_fake # If real data is provided, concatenate it with the Generator's output # along the batch dimension for improved efficiency. else: D_input = torch.cat([G_z, x], 0) if x is not None else G_z D_class = torch.cat([gy, dy], 0) if dy is not None else gy # Get Discriminator output D_out, quant_loss, ppl = self.D(D_input, D_class) # print(torch.split(D_out, [G_z.shape[0], x.shape[0]])) if x is not None: D_real, D_fake = torch.split(D_out, [G_z.shape[0], x.shape[0]]) quant_loss_real, quant_loss_fake = torch.split(quant_loss, (G_z.shape[0], x.shape[0]), dim=0) return D_real, D_fake, quant_loss_real, quant_loss_fake, ppl.view(-1, 1) # D_fake, # D_real else: if return_G_z: return D_out, G_z else: return D_out, quant_loss ================================================ FILE: FQ-BigGAN/BigGANdeep.py ================================================ import numpy as np import math import functools import torch import torch.nn as nn from torch.nn import init import torch.optim as optim import torch.nn.functional as F from torch.nn import Parameter as P import layers from sync_batchnorm import SynchronizedBatchNorm2d as SyncBatchNorm2d # BigGAN-deep: uses a different resblock and pattern # Architectures for G # Attention is passed in in the format '32_64' to mean applying an attention # block at both resolution 32x32 and 64x64. Just '64' will apply at 64x64. # Channel ratio is the ratio of class GBlock(nn.Module): def __init__(self, in_channels, out_channels, which_conv=nn.Conv2d, which_bn=layers.bn, activation=None, upsample=None, channel_ratio=4): super(GBlock, self).__init__() self.in_channels, self.out_channels = in_channels, out_channels self.hidden_channels = self.in_channels // channel_ratio self.which_conv, self.which_bn = which_conv, which_bn self.activation = activation # Conv layers self.conv1 = self.which_conv(self.in_channels, self.hidden_channels, kernel_size=1, padding=0) self.conv2 = self.which_conv(self.hidden_channels, self.hidden_channels) self.conv3 = self.which_conv(self.hidden_channels, self.hidden_channels) self.conv4 = self.which_conv(self.hidden_channels, self.out_channels, kernel_size=1, padding=0) # Batchnorm layers self.bn1 = self.which_bn(self.in_channels) self.bn2 = self.which_bn(self.hidden_channels) self.bn3 = self.which_bn(self.hidden_channels) self.bn4 = self.which_bn(self.hidden_channels) # upsample layers self.upsample = upsample def forward(self, x, y): # Project down to channel ratio h = self.conv1(self.activation(self.bn1(x, y))) # Apply next BN-ReLU h = self.activation(self.bn2(h, y)) # Drop channels in x if necessary if self.in_channels != self.out_channels: x = x[:, :self.out_channels] # Upsample both h and x at this point if self.upsample: h = self.upsample(h) x = self.upsample(x) # 3x3 convs h = self.conv2(h) h = self.conv3(self.activation(self.bn3(h, y))) # Final 1x1 conv h = self.conv4(self.activation(self.bn4(h, y))) return h + x def G_arch(ch=64, attention='64', ksize='333333', dilation='111111'): arch = {} arch[256] = {'in_channels' : [ch * item for item in [16, 16, 8, 8, 4, 2]], 'out_channels' : [ch * item for item in [16, 8, 8, 4, 2, 1]], 'upsample' : [True] * 6, 'resolution' : [8, 16, 32, 64, 128, 256], 'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')]) for i in range(3,9)}} arch[128] = {'in_channels' : [ch * item for item in [16, 16, 8, 4, 2]], 'out_channels' : [ch * item for item in [16, 8, 4, 2, 1]], 'upsample' : [True] * 5, 'resolution' : [8, 16, 32, 64, 128], 'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')]) for i in range(3,8)}} arch[64] = {'in_channels' : [ch * item for item in [16, 16, 8, 4]], 'out_channels' : [ch * item for item in [16, 8, 4, 2]], 'upsample' : [True] * 4, 'resolution' : [8, 16, 32, 64], 'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')]) for i in range(3,7)}} arch[32] = {'in_channels' : [ch * item for item in [4, 4, 4]], 'out_channels' : [ch * item for item in [4, 4, 4]], 'upsample' : [True] * 3, 'resolution' : [8, 16, 32], 'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')]) for i in range(3,6)}} return arch class Generator(nn.Module): def __init__(self, G_ch=64, G_depth=2, dim_z=128, bottom_width=4, resolution=128, G_kernel_size=3, G_attn='64', n_classes=1000, num_G_SVs=1, num_G_SV_itrs=1, G_shared=True, shared_dim=0, hier=False, cross_replica=False, mybn=False, G_activation=nn.ReLU(inplace=False), G_lr=5e-5, G_B1=0.0, G_B2=0.999, adam_eps=1e-8, BN_eps=1e-5, SN_eps=1e-12, G_mixed_precision=False, G_fp16=False, G_init='ortho', skip_init=False, no_optim=False, G_param='SN', norm_style='bn', **kwargs): super(Generator, self).__init__() # Channel width mulitplier self.ch = G_ch # Number of resblocks per stage self.G_depth = G_depth # Dimensionality of the latent space self.dim_z = dim_z # The initial spatial dimensions self.bottom_width = bottom_width # Resolution of the output self.resolution = resolution # Kernel size? self.kernel_size = G_kernel_size # Attention? self.attention = G_attn # number of classes, for use in categorical conditional generation self.n_classes = n_classes # Use shared embeddings? self.G_shared = G_shared # Dimensionality of the shared embedding? Unused if not using G_shared self.shared_dim = shared_dim if shared_dim > 0 else dim_z # Hierarchical latent space? self.hier = hier # Cross replica batchnorm? self.cross_replica = cross_replica # Use my batchnorm? self.mybn = mybn # nonlinearity for residual blocks self.activation = G_activation # Initialization style self.init = G_init # Parameterization style self.G_param = G_param # Normalization style self.norm_style = norm_style # Epsilon for BatchNorm? self.BN_eps = BN_eps # Epsilon for Spectral Norm? self.SN_eps = SN_eps # fp16? self.fp16 = G_fp16 # Architecture dict self.arch = G_arch(self.ch, self.attention)[resolution] # Which convs, batchnorms, and linear layers to use if self.G_param == 'SN': self.which_conv = functools.partial(layers.SNConv2d, kernel_size=3, padding=1, num_svs=num_G_SVs, num_itrs=num_G_SV_itrs, eps=self.SN_eps) self.which_linear = functools.partial(layers.SNLinear, num_svs=num_G_SVs, num_itrs=num_G_SV_itrs, eps=self.SN_eps) else: self.which_conv = functools.partial(nn.Conv2d, kernel_size=3, padding=1) self.which_linear = nn.Linear # We use a non-spectral-normed embedding here regardless; # For some reason applying SN to G's embedding seems to randomly cripple G self.which_embedding = nn.Embedding bn_linear = (functools.partial(self.which_linear, bias=False) if self.G_shared else self.which_embedding) self.which_bn = functools.partial(layers.ccbn, which_linear=bn_linear, cross_replica=self.cross_replica, mybn=self.mybn, input_size=(self.shared_dim + self.dim_z if self.G_shared else self.n_classes), norm_style=self.norm_style, eps=self.BN_eps) # Prepare model # If not using shared embeddings, self.shared is just a passthrough self.shared = (self.which_embedding(n_classes, self.shared_dim) if G_shared else layers.identity()) # First linear layer self.linear = self.which_linear(self.dim_z + self.shared_dim, self.arch['in_channels'][0] * (self.bottom_width **2)) # self.blocks is a doubly-nested list of modules, the outer loop intended # to be over blocks at a given resolution (resblocks and/or self-attention) # while the inner loop is over a given block self.blocks = [] for index in range(len(self.arch['out_channels'])): self.blocks += [[GBlock(in_channels=self.arch['in_channels'][index], out_channels=self.arch['in_channels'][index] if g_index==0 else self.arch['out_channels'][index], which_conv=self.which_conv, which_bn=self.which_bn, activation=self.activation, upsample=(functools.partial(F.interpolate, scale_factor=2) if self.arch['upsample'][index] and g_index == (self.G_depth-1) else None))] for g_index in range(self.G_depth)] # If attention on this block, attach it to the end if self.arch['attention'][self.arch['resolution'][index]]: print('Adding attention layer in G at resolution %d' % self.arch['resolution'][index]) self.blocks[-1] += [layers.Attention(self.arch['out_channels'][index], self.which_conv)] # Turn self.blocks into a ModuleList so that it's all properly registered. self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks]) # output layer: batchnorm-relu-conv. # Consider using a non-spectral conv here self.output_layer = nn.Sequential(layers.bn(self.arch['out_channels'][-1], cross_replica=self.cross_replica, mybn=self.mybn), self.activation, self.which_conv(self.arch['out_channels'][-1], 3)) # Initialize weights. Optionally skip init for testing. if not skip_init: self.init_weights() # Set up optimizer # If this is an EMA copy, no need for an optim, so just return now if no_optim: return self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps if G_mixed_precision: print('Using fp16 adam in G...') import utils self.optim = utils.Adam16(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps) else: self.optim = optim.Adam(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps) # LR scheduling, left here for forward compatibility # self.lr_sched = {'itr' : 0}# if self.progressive else {} # self.j = 0 # Initialize def init_weights(self): self.param_count = 0 for module in self.modules(): if (isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear) or isinstance(module, nn.Embedding)): if self.init == 'ortho': init.orthogonal_(module.weight) elif self.init == 'N02': init.normal_(module.weight, 0, 0.02) elif self.init in ['glorot', 'xavier']: init.xavier_uniform_(module.weight) else: print('Init style not recognized...') self.param_count += sum([p.data.nelement() for p in module.parameters()]) print('Param count for G''s initialized parameters: %d' % self.param_count) # Note on this forward function: we pass in a y vector which has # already been passed through G.shared to enable easy class-wise # interpolation later. If we passed in the one-hot and then ran it through # G.shared in this forward function, it would be harder to handle. # NOTE: The z vs y dichotomy here is for compatibility with not-y def forward(self, z, y): # If hierarchical, concatenate zs and ys if self.hier: z = torch.cat([y, z], 1) y = z # First linear layer h = self.linear(z) # Reshape h = h.view(h.size(0), -1, self.bottom_width, self.bottom_width) # Loop over blocks for index, blocklist in enumerate(self.blocks): # Second inner loop in case block has multiple layers for block in blocklist: h = block(h, y) # Apply batchnorm-relu-conv-tanh at output return torch.tanh(self.output_layer(h)) class DBlock(nn.Module): def __init__(self, in_channels, out_channels, which_conv=layers.SNConv2d, wide=True, preactivation=True, activation=None, downsample=None, channel_ratio=4): super(DBlock, self).__init__() self.in_channels, self.out_channels = in_channels, out_channels # If using wide D (as in SA-GAN and BigGAN), change the channel pattern self.hidden_channels = self.out_channels // channel_ratio self.which_conv = which_conv self.preactivation = preactivation self.activation = activation self.downsample = downsample # Conv layers self.conv1 = self.which_conv(self.in_channels, self.hidden_channels, kernel_size=1, padding=0) self.conv2 = self.which_conv(self.hidden_channels, self.hidden_channels) self.conv3 = self.which_conv(self.hidden_channels, self.hidden_channels) self.conv4 = self.which_conv(self.hidden_channels, self.out_channels, kernel_size=1, padding=0) self.learnable_sc = True if (in_channels != out_channels) else False if self.learnable_sc: self.conv_sc = self.which_conv(in_channels, out_channels - in_channels, kernel_size=1, padding=0) def shortcut(self, x): if self.downsample: x = self.downsample(x) if self.learnable_sc: x = torch.cat([x, self.conv_sc(x)], 1) return x def forward(self, x): # 1x1 bottleneck conv h = self.conv1(F.relu(x)) # 3x3 convs h = self.conv2(self.activation(h)) h = self.conv3(self.activation(h)) # relu before downsample h = self.activation(h) # downsample if self.downsample: h = self.downsample(h) # final 1x1 conv h = self.conv4(h) return h + self.shortcut(x) # Discriminator architecture, same paradigm as G's above def D_arch(ch=64, attention='64',ksize='333333', dilation='111111'): arch = {} arch[256] = {'in_channels' : [item * ch for item in [1, 2, 4, 8, 8, 16]], 'out_channels' : [item * ch for item in [2, 4, 8, 8, 16, 16]], 'downsample' : [True] * 6 + [False], 'resolution' : [128, 64, 32, 16, 8, 4, 4 ], 'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')] for i in range(2,8)}} arch[128] = {'in_channels' : [item * ch for item in [1, 2, 4, 8, 16]], 'out_channels' : [item * ch for item in [2, 4, 8, 16, 16]], 'downsample' : [True] * 5 + [False], 'resolution' : [64, 32, 16, 8, 4, 4], 'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')] for i in range(2,8)}} arch[64] = {'in_channels' : [item * ch for item in [1, 2, 4, 8]], 'out_channels' : [item * ch for item in [2, 4, 8, 16]], 'downsample' : [True] * 4 + [False], 'resolution' : [32, 16, 8, 4, 4], 'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')] for i in range(2,7)}} arch[32] = {'in_channels' : [item * ch for item in [4, 4, 4]], 'out_channels' : [item * ch for item in [4, 4, 4]], 'downsample' : [True, True, False, False], 'resolution' : [16, 16, 16, 16], 'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')] for i in range(2,6)}} return arch class Discriminator(nn.Module): def __init__(self, D_ch=64, D_wide=True, D_depth=2, resolution=128, D_kernel_size=3, D_attn='64', n_classes=1000, num_D_SVs=1, num_D_SV_itrs=1, D_activation=nn.ReLU(inplace=False), D_lr=2e-4, D_B1=0.0, D_B2=0.999, adam_eps=1e-8, SN_eps=1e-12, output_dim=1, D_mixed_precision=False, D_fp16=False, D_init='ortho', skip_init=False, D_param='SN', **kwargs): super(Discriminator, self).__init__() # Width multiplier self.ch = D_ch # Use Wide D as in BigGAN and SA-GAN or skinny D as in SN-GAN? self.D_wide = D_wide # How many resblocks per stage? self.D_depth = D_depth # Resolution self.resolution = resolution # Kernel size self.kernel_size = D_kernel_size # Attention? self.attention = D_attn # Number of classes self.n_classes = n_classes # Activation self.activation = D_activation # Initialization style self.init = D_init # Parameterization style self.D_param = D_param # Epsilon for Spectral Norm? self.SN_eps = SN_eps # Fp16? self.fp16 = D_fp16 # Architecture self.arch = D_arch(self.ch, self.attention)[resolution] # Which convs, batchnorms, and linear layers to use # No option to turn off SN in D right now if self.D_param == 'SN': self.which_conv = functools.partial(layers.SNConv2d, kernel_size=3, padding=1, num_svs=num_D_SVs, num_itrs=num_D_SV_itrs, eps=self.SN_eps) self.which_linear = functools.partial(layers.SNLinear, num_svs=num_D_SVs, num_itrs=num_D_SV_itrs, eps=self.SN_eps) self.which_embedding = functools.partial(layers.SNEmbedding, num_svs=num_D_SVs, num_itrs=num_D_SV_itrs, eps=self.SN_eps) # Prepare model # Stem convolution self.input_conv = self.which_conv(3, self.arch['in_channels'][0]) # self.blocks is a doubly-nested list of modules, the outer loop intended # to be over blocks at a given resolution (resblocks and/or self-attention) self.blocks = [] for index in range(len(self.arch['out_channels'])): self.blocks += [[DBlock(in_channels=self.arch['in_channels'][index] if d_index==0 else self.arch['out_channels'][index], out_channels=self.arch['out_channels'][index], which_conv=self.which_conv, wide=self.D_wide, activation=self.activation, preactivation=True, downsample=(nn.AvgPool2d(2) if self.arch['downsample'][index] and d_index==0 else None)) for d_index in range(self.D_depth)]] # If attention on this block, attach it to the end if self.arch['attention'][self.arch['resolution'][index]]: print('Adding attention layer in D at resolution %d' % self.arch['resolution'][index]) self.blocks[-1] += [layers.Attention(self.arch['out_channels'][index], self.which_conv)] # Turn self.blocks into a ModuleList so that it's all properly registered. self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks]) # Linear output layer. The output dimension is typically 1, but may be # larger if we're e.g. turning this into a VAE with an inference output self.linear = self.which_linear(self.arch['out_channels'][-1], output_dim) # Embedding for projection discrimination self.embed = self.which_embedding(self.n_classes, self.arch['out_channels'][-1]) # Initialize weights if not skip_init: self.init_weights() # Set up optimizer self.lr, self.B1, self.B2, self.adam_eps = D_lr, D_B1, D_B2, adam_eps if D_mixed_precision: print('Using fp16 adam in D...') import utils self.optim = utils.Adam16(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps) else: self.optim = optim.Adam(params=self.parameters(), lr=self.lr, betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps) # LR scheduling, left here for forward compatibility # self.lr_sched = {'itr' : 0}# if self.progressive else {} # self.j = 0 # Initialize def init_weights(self): self.param_count = 0 for module in self.modules(): if (isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear) or isinstance(module, nn.Embedding)): if self.init == 'ortho': init.orthogonal_(module.weight) elif self.init == 'N02': init.normal_(module.weight, 0, 0.02) elif self.init in ['glorot', 'xavier']: init.xavier_uniform_(module.weight) else: print('Init style not recognized...') self.param_count += sum([p.data.nelement() for p in module.parameters()]) print('Param count for D''s initialized parameters: %d' % self.param_count) def forward(self, x, y=None): # Run input conv h = self.input_conv(x) # Loop over blocks for index, blocklist in enumerate(self.blocks): for block in blocklist: h = block(h) # Apply global sum pooling as in SN-GAN h = torch.sum(self.activation(h), [2, 3]) # Get initial class-unconditional output out = self.linear(h) # Get projection of final featureset onto class vectors and add to evidence out = out + torch.sum(self.embed(y) * h, 1, keepdim=True) return out # Parallelized G_D to minimize cross-gpu communication # Without this, Generator outputs would get all-gathered and then rebroadcast. class G_D(nn.Module): def __init__(self, G, D): super(G_D, self).__init__() self.G = G self.D = D def forward(self, z, gy, x=None, dy=None, train_G=False, return_G_z=False, split_D=False): # If training G, enable grad tape with torch.set_grad_enabled(train_G): # Get Generator output given noise G_z = self.G(z, self.G.shared(gy)) # Cast as necessary if self.G.fp16 and not self.D.fp16: G_z = G_z.float() if self.D.fp16 and not self.G.fp16: G_z = G_z.half() # Split_D means to run D once with real data and once with fake, # rather than concatenating along the batch dimension. if split_D: D_fake = self.D(G_z, gy) if x is not None: D_real = self.D(x, dy) return D_fake, D_real else: if return_G_z: return D_fake, G_z else: return D_fake # If real data is provided, concatenate it with the Generator's output # along the batch dimension for improved efficiency. else: D_input = torch.cat([G_z, x], 0) if x is not None else G_z D_class = torch.cat([gy, dy], 0) if dy is not None else gy # Get Discriminator output D_out = self.D(D_input, D_class) if x is not None: return torch.split(D_out, [G_z.shape[0], x.shape[0]]) # D_fake, D_real else: if return_G_z: return D_out, G_z else: return D_out ================================================ FILE: FQ-BigGAN/LICENSE ================================================ MIT License Copyright (c) 2019 Andy Brock Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: FQ-BigGAN/TFHub/README.md ================================================ # BigGAN-PyTorch TFHub converter This dir contains scripts for taking the [pre-trained generator weights from TFHub](https://tfhub.dev/s?q=biggan) and porting them to BigGAN-Pytorch. In addition to the base libraries for BigGAN-PyTorch, to run this code you will need: TensorFlow TFHub parse Note that this code is only presently set up to run the ported models without truncation--you'll need to accumulate standing stats at each truncation level yourself if you wish to employ it. To port the 128x128 model from tfhub, produce a pretrained weights .pth file, and generate samples using all your GPUs, run `python converter.py -r 128 --generate_samples --parallel` ================================================ FILE: FQ-BigGAN/TFHub/biggan_v1.py ================================================ # BigGAN V1: # This is now deprecated code used for porting the TFHub modules to pytorch, # included here for reference only. import numpy as np import torch from scipy.stats import truncnorm from torch import nn from torch.nn import Parameter from torch.nn import functional as F def l2normalize(v, eps=1e-4): return v / (v.norm() + eps) def truncated_z_sample(batch_size, z_dim, truncation=0.5, seed=None): state = None if seed is None else np.random.RandomState(seed) values = truncnorm.rvs(-2, 2, size=(batch_size, z_dim), random_state=state) return truncation * values def denorm(x): out = (x + 1) / 2 return out.clamp_(0, 1) class SpectralNorm(nn.Module): def __init__(self, module, name='weight', power_iterations=1): super(SpectralNorm, self).__init__() self.module = module self.name = name self.power_iterations = power_iterations if not self._made_params(): self._make_params() def _update_u_v(self): u = getattr(self.module, self.name + "_u") v = getattr(self.module, self.name + "_v") w = getattr(self.module, self.name + "_bar") height = w.data.shape[0] _w = w.view(height, -1) for _ in range(self.power_iterations): v = l2normalize(torch.matmul(_w.t(), u)) u = l2normalize(torch.matmul(_w, v)) sigma = u.dot((_w).mv(v)) setattr(self.module, self.name, w / sigma.expand_as(w)) def _made_params(self): try: getattr(self.module, self.name + "_u") getattr(self.module, self.name + "_v") getattr(self.module, self.name + "_bar") return True except AttributeError: return False def _make_params(self): w = getattr(self.module, self.name) height = w.data.shape[0] width = w.view(height, -1).data.shape[1] u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False) v = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False) u.data = l2normalize(u.data) v.data = l2normalize(v.data) w_bar = Parameter(w.data) del self.module._parameters[self.name] self.module.register_parameter(self.name + "_u", u) self.module.register_parameter(self.name + "_v", v) self.module.register_parameter(self.name + "_bar", w_bar) def forward(self, *args): self._update_u_v() return self.module.forward(*args) class SelfAttention(nn.Module): """ Self Attention Layer""" def __init__(self, in_dim, activation=F.relu): super().__init__() self.chanel_in = in_dim self.activation = activation self.theta = SpectralNorm(nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1, bias=False)) self.phi = SpectralNorm(nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1, bias=False)) self.pool = nn.MaxPool2d(2, 2) self.g = SpectralNorm(nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1, bias=False)) self.o_conv = SpectralNorm(nn.Conv2d(in_channels=in_dim // 2, out_channels=in_dim, kernel_size=1, bias=False)) self.gamma = nn.Parameter(torch.zeros(1)) self.softmax = nn.Softmax(dim=-1) def forward(self, x): m_batchsize, C, width, height = x.size() N = height * width theta = self.theta(x) phi = self.phi(x) phi = self.pool(phi) phi = phi.view(m_batchsize, -1, N // 4) theta = theta.view(m_batchsize, -1, N) theta = theta.permute(0, 2, 1) attention = self.softmax(torch.bmm(theta, phi)) g = self.pool(self.g(x)).view(m_batchsize, -1, N // 4) attn_g = torch.bmm(g, attention.permute(0, 2, 1)).view(m_batchsize, -1, width, height) out = self.o_conv(attn_g) return self.gamma * out + x class ConditionalBatchNorm2d(nn.Module): def __init__(self, num_features, num_classes, eps=1e-4, momentum=0.1): super().__init__() self.num_features = num_features self.bn = nn.BatchNorm2d(num_features, affine=False, eps=eps, momentum=momentum) self.gamma_embed = SpectralNorm(nn.Linear(num_classes, num_features, bias=False)) self.beta_embed = SpectralNorm(nn.Linear(num_classes, num_features, bias=False)) def forward(self, x, y): out = self.bn(x) gamma = self.gamma_embed(y) + 1 beta = self.beta_embed(y) out = gamma.view(-1, self.num_features, 1, 1) * out + beta.view(-1, self.num_features, 1, 1) return out class GBlock(nn.Module): def __init__( self, in_channel, out_channel, kernel_size=[3, 3], padding=1, stride=1, n_class=None, bn=True, activation=F.relu, upsample=True, downsample=False, z_dim=148, ): super().__init__() self.conv0 = SpectralNorm( nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, bias=True if bn else True) ) self.conv1 = SpectralNorm( nn.Conv2d(out_channel, out_channel, kernel_size, stride, padding, bias=True if bn else True) ) self.skip_proj = False if in_channel != out_channel or upsample or downsample: self.conv_sc = SpectralNorm(nn.Conv2d(in_channel, out_channel, 1, 1, 0)) self.skip_proj = True self.upsample = upsample self.downsample = downsample self.activation = activation self.bn = bn if bn: self.HyperBN = ConditionalBatchNorm2d(in_channel, z_dim) self.HyperBN_1 = ConditionalBatchNorm2d(out_channel, z_dim) def forward(self, input, condition=None): out = input if self.bn: out = self.HyperBN(out, condition) out = self.activation(out) if self.upsample: out = F.interpolate(out, scale_factor=2) out = self.conv0(out) if self.bn: out = self.HyperBN_1(out, condition) out = self.activation(out) out = self.conv1(out) if self.downsample: out = F.avg_pool2d(out, 2) if self.skip_proj: skip = input if self.upsample: skip = F.interpolate(skip, scale_factor=2) skip = self.conv_sc(skip) if self.downsample: skip = F.avg_pool2d(skip, 2) else: skip = input return out + skip class Generator128(nn.Module): def __init__(self, code_dim=120, n_class=1000, chn=96, debug=False): super().__init__() self.linear = nn.Linear(n_class, 128, bias=False) if debug: chn = 8 self.first_view = 16 * chn self.G_linear = SpectralNorm(nn.Linear(20, 4 * 4 * 16 * chn)) z_dim = code_dim + 28 self.GBlock = nn.ModuleList([ GBlock(16 * chn, 16 * chn, n_class=n_class, z_dim=z_dim), GBlock(16 * chn, 8 * chn, n_class=n_class, z_dim=z_dim), GBlock(8 * chn, 4 * chn, n_class=n_class, z_dim=z_dim), GBlock(4 * chn, 2 * chn, n_class=n_class, z_dim=z_dim), GBlock(2 * chn, 1 * chn, n_class=n_class, z_dim=z_dim), ]) self.sa_id = 4 self.num_split = len(self.GBlock) + 1 self.attention = SelfAttention(2 * chn) self.ScaledCrossReplicaBN = nn.BatchNorm2d(1 * chn, eps=1e-4) self.colorize = SpectralNorm(nn.Conv2d(1 * chn, 3, [3, 3], padding=1)) def forward(self, input, class_id): codes = torch.chunk(input, self.num_split, 1) class_emb = self.linear(class_id) # 128 out = self.G_linear(codes[0]) out = out.view(-1, 4, 4, self.first_view).permute(0, 3, 1, 2) for i, (code, GBlock) in enumerate(zip(codes[1:], self.GBlock)): if i == self.sa_id: out = self.attention(out) condition = torch.cat([code, class_emb], 1) out = GBlock(out, condition) out = self.ScaledCrossReplicaBN(out) out = F.relu(out) out = self.colorize(out) return torch.tanh(out) class Generator256(nn.Module): def __init__(self, code_dim=140, n_class=1000, chn=96, debug=False): super().__init__() self.linear = nn.Linear(n_class, 128, bias=False) if debug: chn = 8 self.first_view = 16 * chn self.G_linear = SpectralNorm(nn.Linear(20, 4 * 4 * 16 * chn)) self.GBlock = nn.ModuleList([ GBlock(16 * chn, 16 * chn, n_class=n_class), GBlock(16 * chn, 8 * chn, n_class=n_class), GBlock(8 * chn, 8 * chn, n_class=n_class), GBlock(8 * chn, 4 * chn, n_class=n_class), GBlock(4 * chn, 2 * chn, n_class=n_class), GBlock(2 * chn, 1 * chn, n_class=n_class), ]) self.sa_id = 5 self.num_split = len(self.GBlock) + 1 self.attention = SelfAttention(2 * chn) self.ScaledCrossReplicaBN = nn.BatchNorm2d(1 * chn, eps=1e-4) self.colorize = SpectralNorm(nn.Conv2d(1 * chn, 3, [3, 3], padding=1)) def forward(self, input, class_id): codes = torch.chunk(input, self.num_split, 1) class_emb = self.linear(class_id) # 128 out = self.G_linear(codes[0]) out = out.view(-1, 4, 4, self.first_view).permute(0, 3, 1, 2) for i, (code, GBlock) in enumerate(zip(codes[1:], self.GBlock)): if i == self.sa_id: out = self.attention(out) condition = torch.cat([code, class_emb], 1) out = GBlock(out, condition) out = self.ScaledCrossReplicaBN(out) out = F.relu(out) out = self.colorize(out) return torch.tanh(out) class Generator512(nn.Module): def __init__(self, code_dim=128, n_class=1000, chn=96, debug=False): super().__init__() self.linear = nn.Linear(n_class, 128, bias=False) if debug: chn = 8 self.first_view = 16 * chn self.G_linear = SpectralNorm(nn.Linear(16, 4 * 4 * 16 * chn)) z_dim = code_dim + 16 self.GBlock = nn.ModuleList([ GBlock(16 * chn, 16 * chn, n_class=n_class, z_dim=z_dim), GBlock(16 * chn, 8 * chn, n_class=n_class, z_dim=z_dim), GBlock(8 * chn, 8 * chn, n_class=n_class, z_dim=z_dim), GBlock(8 * chn, 4 * chn, n_class=n_class, z_dim=z_dim), GBlock(4 * chn, 2 * chn, n_class=n_class, z_dim=z_dim), GBlock(2 * chn, 1 * chn, n_class=n_class, z_dim=z_dim), GBlock(1 * chn, 1 * chn, n_class=n_class, z_dim=z_dim), ]) self.sa_id = 4 self.num_split = len(self.GBlock) + 1 self.attention = SelfAttention(4 * chn) self.ScaledCrossReplicaBN = nn.BatchNorm2d(1 * chn) self.colorize = SpectralNorm(nn.Conv2d(1 * chn, 3, [3, 3], padding=1)) def forward(self, input, class_id): codes = torch.chunk(input, self.num_split, 1) class_emb = self.linear(class_id) # 128 out = self.G_linear(codes[0]) out = out.view(-1, 4, 4, self.first_view).permute(0, 3, 1, 2) for i, (code, GBlock) in enumerate(zip(codes[1:], self.GBlock)): if i == self.sa_id: out = self.attention(out) condition = torch.cat([code, class_emb], 1) out = GBlock(out, condition) out = self.ScaledCrossReplicaBN(out) out = F.relu(out) out = self.colorize(out) return torch.tanh(out) class Discriminator(nn.Module): def __init__(self, n_class=1000, chn=96, debug=False): super().__init__() def conv(in_channel, out_channel, downsample=True): return GBlock(in_channel, out_channel, bn=False, upsample=False, downsample=downsample) if debug: chn = 8 self.debug = debug self.pre_conv = nn.Sequential( SpectralNorm(nn.Conv2d(3, 1 * chn, 3, padding=1)), nn.ReLU(), SpectralNorm(nn.Conv2d(1 * chn, 1 * chn, 3, padding=1)), nn.AvgPool2d(2), ) self.pre_skip = SpectralNorm(nn.Conv2d(3, 1 * chn, 1)) self.conv = nn.Sequential( conv(1 * chn, 1 * chn, downsample=True), conv(1 * chn, 2 * chn, downsample=True), SelfAttention(2 * chn), conv(2 * chn, 2 * chn, downsample=True), conv(2 * chn, 4 * chn, downsample=True), conv(4 * chn, 8 * chn, downsample=True), conv(8 * chn, 8 * chn, downsample=True), conv(8 * chn, 16 * chn, downsample=True), conv(16 * chn, 16 * chn, downsample=False), ) self.linear = SpectralNorm(nn.Linear(16 * chn, 1)) self.embed = nn.Embedding(n_class, 16 * chn) self.embed.weight.data.uniform_(-0.1, 0.1) self.embed = SpectralNorm(self.embed) def forward(self, input, class_id): out = self.pre_conv(input) out += self.pre_skip(F.avg_pool2d(input, 2)) out = self.conv(out) out = F.relu(out) out = out.view(out.size(0), out.size(1), -1) out = out.sum(2) out_linear = self.linear(out).squeeze(1) embed = self.embed(class_id) prod = (out * embed).sum(1) return out_linear + prod ================================================ FILE: FQ-BigGAN/TFHub/converter.py ================================================ """Utilities for converting TFHub BigGAN generator weights to PyTorch. Recommended usage: To convert all BigGAN variants and generate test samples, use: ```bash CUDA_VISIBLE_DEVICES=0 python converter.py --generate_samples ``` See `parse_args` for additional options. """ import argparse import os import sys import h5py import torch import torch.nn as nn from torchvision.utils import save_image import tensorflow as tf import tensorflow_hub as hub import parse # import reference biggan from this folder import biggan_v1 as biggan_for_conversion # Import model from main folder sys.path.append('..') import BigGAN DEVICE = 'cuda' HDF5_TMPL = 'biggan-{}.h5' PTH_TMPL = 'biggan-{}.pth' MODULE_PATH_TMPL = 'https://tfhub.dev/deepmind/biggan-{}/2' Z_DIMS = { 128: 120, 256: 140, 512: 128} RESOLUTIONS = list(Z_DIMS) def dump_tfhub_to_hdf5(module_path, hdf5_path, redownload=False): """Loads TFHub weights and saves them to intermediate HDF5 file. Args: module_path ([Path-like]): Path to TFHub module. hdf5_path ([Path-like]): Path to output HDF5 file. Returns: [h5py.File]: Loaded hdf5 file containing module weights. """ if os.path.exists(hdf5_path) and (not redownload): print('Loading BigGAN hdf5 file from:', hdf5_path) return h5py.File(hdf5_path, 'r') print('Loading BigGAN module from:', module_path) tf.reset_default_graph() hub.Module(module_path) print('Loaded BigGAN module from:', module_path) initializer = tf.global_variables_initializer() sess = tf.Session() sess.run(initializer) print('Saving BigGAN weights to :', hdf5_path) h5f = h5py.File(hdf5_path, 'w') for var in tf.global_variables(): val = sess.run(var) h5f.create_dataset(var.name, data=val) print(f'Saving {var.name} with shape {val.shape}') h5f.close() return h5py.File(hdf5_path, 'r') class TFHub2Pytorch(object): TF_ROOT = 'module' NUM_GBLOCK = { 128: 5, 256: 6, 512: 7 } w = 'w' b = 'b' u = 'u0' v = 'u1' gamma = 'gamma' beta = 'beta' def __init__(self, state_dict, tf_weights, resolution=256, load_ema=True, verbose=False): self.state_dict = state_dict self.tf_weights = tf_weights self.resolution = resolution self.verbose = verbose if load_ema: for name in ['w', 'b', 'gamma', 'beta']: setattr(self, name, getattr(self, name) + '/ema_b999900') def load(self): self.load_generator() return self.state_dict def load_generator(self): GENERATOR_ROOT = os.path.join(self.TF_ROOT, 'Generator') for i in range(self.NUM_GBLOCK[self.resolution]): name_tf = os.path.join(GENERATOR_ROOT, 'GBlock') name_tf += f'_{i}' if i != 0 else '' self.load_GBlock(f'GBlock.{i}.', name_tf) self.load_attention('attention.', os.path.join(GENERATOR_ROOT, 'attention')) self.load_linear('linear', os.path.join(self.TF_ROOT, 'linear'), bias=False) self.load_snlinear('G_linear', os.path.join(GENERATOR_ROOT, 'G_Z', 'G_linear')) self.load_colorize('colorize', os.path.join(GENERATOR_ROOT, 'conv_2d')) self.load_ScaledCrossReplicaBNs('ScaledCrossReplicaBN', os.path.join(GENERATOR_ROOT, 'ScaledCrossReplicaBN')) def load_linear(self, name_pth, name_tf, bias=True): self.state_dict[name_pth + '.weight'] = self.load_tf_tensor(name_tf, self.w).permute(1, 0) if bias: self.state_dict[name_pth + '.bias'] = self.load_tf_tensor(name_tf, self.b) def load_snlinear(self, name_pth, name_tf, bias=True): self.state_dict[name_pth + '.module.weight_u'] = self.load_tf_tensor(name_tf, self.u).squeeze() self.state_dict[name_pth + '.module.weight_v'] = self.load_tf_tensor(name_tf, self.v).squeeze() self.state_dict[name_pth + '.module.weight_bar'] = self.load_tf_tensor(name_tf, self.w).permute(1, 0) if bias: self.state_dict[name_pth + '.module.bias'] = self.load_tf_tensor(name_tf, self.b) def load_colorize(self, name_pth, name_tf): self.load_snconv(name_pth, name_tf) def load_GBlock(self, name_pth, name_tf): self.load_convs(name_pth, name_tf) self.load_HyperBNs(name_pth, name_tf) def load_convs(self, name_pth, name_tf): self.load_snconv(name_pth + 'conv0', os.path.join(name_tf, 'conv0')) self.load_snconv(name_pth + 'conv1', os.path.join(name_tf, 'conv1')) self.load_snconv(name_pth + 'conv_sc', os.path.join(name_tf, 'conv_sc')) def load_snconv(self, name_pth, name_tf, bias=True): if self.verbose: print(f'loading: {name_pth} from {name_tf}') self.state_dict[name_pth + '.module.weight_u'] = self.load_tf_tensor(name_tf, self.u).squeeze() self.state_dict[name_pth + '.module.weight_v'] = self.load_tf_tensor(name_tf, self.v).squeeze() self.state_dict[name_pth + '.module.weight_bar'] = self.load_tf_tensor(name_tf, self.w).permute(3, 2, 0, 1) if bias: self.state_dict[name_pth + '.module.bias'] = self.load_tf_tensor(name_tf, self.b).squeeze() def load_conv(self, name_pth, name_tf, bias=True): self.state_dict[name_pth + '.weight_u'] = self.load_tf_tensor(name_tf, self.u).squeeze() self.state_dict[name_pth + '.weight_v'] = self.load_tf_tensor(name_tf, self.v).squeeze() self.state_dict[name_pth + '.weight_bar'] = self.load_tf_tensor(name_tf, self.w).permute(3, 2, 0, 1) if bias: self.state_dict[name_pth + '.bias'] = self.load_tf_tensor(name_tf, self.b) def load_HyperBNs(self, name_pth, name_tf): self.load_HyperBN(name_pth + 'HyperBN', os.path.join(name_tf, 'HyperBN')) self.load_HyperBN(name_pth + 'HyperBN_1', os.path.join(name_tf, 'HyperBN_1')) def load_ScaledCrossReplicaBNs(self, name_pth, name_tf): self.state_dict[name_pth + '.bias'] = self.load_tf_tensor(name_tf, self.beta).squeeze() self.state_dict[name_pth + '.weight'] = self.load_tf_tensor(name_tf, self.gamma).squeeze() self.state_dict[name_pth + '.running_mean'] = self.load_tf_tensor(name_tf + 'bn', 'accumulated_mean') self.state_dict[name_pth + '.running_var'] = self.load_tf_tensor(name_tf + 'bn', 'accumulated_var') self.state_dict[name_pth + '.num_batches_tracked'] = torch.tensor( self.tf_weights[os.path.join(name_tf + 'bn', 'accumulation_counter:0')][()], dtype=torch.float32) def load_HyperBN(self, name_pth, name_tf): if self.verbose: print(f'loading: {name_pth} from {name_tf}') beta = name_pth + '.beta_embed.module' gamma = name_pth + '.gamma_embed.module' self.state_dict[beta + '.weight_u'] = self.load_tf_tensor(os.path.join(name_tf, 'beta'), self.u).squeeze() self.state_dict[gamma + '.weight_u'] = self.load_tf_tensor(os.path.join(name_tf, 'gamma'), self.u).squeeze() self.state_dict[beta + '.weight_v'] = self.load_tf_tensor(os.path.join(name_tf, 'beta'), self.v).squeeze() self.state_dict[gamma + '.weight_v'] = self.load_tf_tensor(os.path.join(name_tf, 'gamma'), self.v).squeeze() self.state_dict[beta + '.weight_bar'] = self.load_tf_tensor(os.path.join(name_tf, 'beta'), self.w).permute(1, 0) self.state_dict[gamma + '.weight_bar'] = self.load_tf_tensor(os.path.join(name_tf, 'gamma'), self.w).permute(1, 0) cr_bn_name = name_tf.replace('HyperBN', 'CrossReplicaBN') self.state_dict[name_pth + '.bn.running_mean'] = self.load_tf_tensor(cr_bn_name, 'accumulated_mean') self.state_dict[name_pth + '.bn.running_var'] = self.load_tf_tensor(cr_bn_name, 'accumulated_var') self.state_dict[name_pth + '.bn.num_batches_tracked'] = torch.tensor( self.tf_weights[os.path.join(cr_bn_name, 'accumulation_counter:0')][()], dtype=torch.float32) def load_attention(self, name_pth, name_tf): self.load_snconv(name_pth + 'theta', os.path.join(name_tf, 'theta'), bias=False) self.load_snconv(name_pth + 'phi', os.path.join(name_tf, 'phi'), bias=False) self.load_snconv(name_pth + 'g', os.path.join(name_tf, 'g'), bias=False) self.load_snconv(name_pth + 'o_conv', os.path.join(name_tf, 'o_conv'), bias=False) self.state_dict[name_pth + 'gamma'] = self.load_tf_tensor(name_tf, self.gamma) def load_tf_tensor(self, prefix, var, device='0'): name = os.path.join(prefix, var) + f':{device}' return torch.from_numpy(self.tf_weights[name][:]) # Convert from v1: This function maps def convert_from_v1(hub_dict, resolution=128): weightname_dict = {'weight_u': 'u0', 'weight_bar': 'weight', 'bias': 'bias'} convnum_dict = {'conv0': 'conv1', 'conv1': 'conv2', 'conv_sc': 'conv_sc'} attention_blocknum = {128: 3, 256: 4, 512: 3}[resolution] hub2me = {'linear.weight': 'shared.weight', # This is actually the shared weight # Linear stuff 'G_linear.module.weight_bar': 'linear.weight', 'G_linear.module.bias': 'linear.bias', 'G_linear.module.weight_u': 'linear.u0', # output layer stuff 'ScaledCrossReplicaBN.weight': 'output_layer.0.gain', 'ScaledCrossReplicaBN.bias': 'output_layer.0.bias', 'ScaledCrossReplicaBN.running_mean': 'output_layer.0.stored_mean', 'ScaledCrossReplicaBN.running_var': 'output_layer.0.stored_var', 'colorize.module.weight_bar': 'output_layer.2.weight', 'colorize.module.bias': 'output_layer.2.bias', 'colorize.module.weight_u': 'output_layer.2.u0', # Attention stuff 'attention.gamma': 'blocks.%d.1.gamma' % attention_blocknum, 'attention.theta.module.weight_u': 'blocks.%d.1.theta.u0' % attention_blocknum, 'attention.theta.module.weight_bar': 'blocks.%d.1.theta.weight' % attention_blocknum, 'attention.phi.module.weight_u': 'blocks.%d.1.phi.u0' % attention_blocknum, 'attention.phi.module.weight_bar': 'blocks.%d.1.phi.weight' % attention_blocknum, 'attention.g.module.weight_u': 'blocks.%d.1.g.u0' % attention_blocknum, 'attention.g.module.weight_bar': 'blocks.%d.1.g.weight' % attention_blocknum, 'attention.o_conv.module.weight_u': 'blocks.%d.1.o.u0' % attention_blocknum, 'attention.o_conv.module.weight_bar':'blocks.%d.1.o.weight' % attention_blocknum, } # Loop over the hub dict and build the hub2me map for name in hub_dict.keys(): if 'GBlock' in name: if 'HyperBN' not in name: # it's a conv out = parse.parse('GBlock.{:d}.{}.module.{}',name) blocknum, convnum, weightname = out if weightname not in weightname_dict: continue # else hyperBN in out_name = 'blocks.%d.0.%s.%s' % (blocknum, convnum_dict[convnum], weightname_dict[weightname]) # Increment conv number by 1 else: # hyperbn not conv BNnum = 2 if 'HyperBN_1' in name else 1 if 'embed' in name: out = parse.parse('GBlock.{:d}.{}.module.{}',name) blocknum, gamma_or_beta, weightname = out if weightname not in weightname_dict: # Ignore weight_v continue out_name = 'blocks.%d.0.bn%d.%s.%s' % (blocknum, BNnum, 'gain' if 'gamma' in gamma_or_beta else 'bias', weightname_dict[weightname]) else: out = parse.parse('GBlock.{:d}.{}.bn.{}',name) blocknum, dummy, mean_or_var = out if 'num_batches_tracked' in mean_or_var: continue out_name = 'blocks.%d.0.bn%d.%s' % (blocknum, BNnum, 'stored_mean' if 'mean' in mean_or_var else 'stored_var') hub2me[name] = out_name # Invert the hub2me map me2hub = {hub2me[item]: item for item in hub2me} new_dict = {} dimz_dict = {128: 20, 256: 20, 512:16} for item in me2hub: # Swap input dim ordering on batchnorm bois to account for my arbitrary change of ordering when concatenating Ys and Zs if ('bn' in item and 'weight' in item) and ('gain' in item or 'bias' in item) and ('output_layer' not in item): new_dict[item] = torch.cat([hub_dict[me2hub[item]][:, -128:], hub_dict[me2hub[item]][:, :dimz_dict[resolution]]], 1) # Reshape the first linear weight, bias, and u0 elif item == 'linear.weight': new_dict[item] = hub_dict[me2hub[item]].contiguous().view(4, 4, 96 * 16, -1).permute(2,0,1,3).contiguous().view(-1,dimz_dict[resolution]) elif item == 'linear.bias': new_dict[item] = hub_dict[me2hub[item]].view(4, 4, 96 * 16).permute(2,0,1).contiguous().view(-1) elif item == 'linear.u0': new_dict[item] = hub_dict[me2hub[item]].view(4, 4, 96 * 16).permute(2,0,1).contiguous().view(1, -1) elif me2hub[item] == 'linear.weight': # THIS IS THE SHARED WEIGHT NOT THE FIRST LINEAR LAYER # Transpose shared weight so that it's an embedding new_dict[item] = hub_dict[me2hub[item]].t() elif 'weight_u' in me2hub[item]: # Unsqueeze u0s new_dict[item] = hub_dict[me2hub[item]].unsqueeze(0) else: new_dict[item] = hub_dict[me2hub[item]] return new_dict def get_config(resolution): attn_dict = {128: '64', 256: '128', 512: '64'} dim_z_dict = {128: 120, 256: 140, 512: 128} config = {'G_param': 'SN', 'D_param': 'SN', 'G_ch': 96, 'D_ch': 96, 'D_wide': True, 'G_shared': True, 'shared_dim': 128, 'dim_z': dim_z_dict[resolution], 'hier': True, 'cross_replica': False, 'mybn': False, 'G_activation': nn.ReLU(inplace=True), 'G_attn': attn_dict[resolution], 'norm_style': 'bn', 'G_init': 'ortho', 'skip_init': True, 'no_optim': True, 'G_fp16': False, 'G_mixed_precision': False, 'accumulate_stats': False, 'num_standing_accumulations': 16, 'G_eval_mode': True, 'BN_eps': 1e-04, 'SN_eps': 1e-04, 'num_G_SVs': 1, 'num_G_SV_itrs': 1, 'resolution': resolution, 'n_classes': 1000} return config def convert_biggan(resolution, weight_dir, redownload=False, no_ema=False, verbose=False): module_path = MODULE_PATH_TMPL.format(resolution) hdf5_path = os.path.join(weight_dir, HDF5_TMPL.format(resolution)) pth_path = os.path.join(weight_dir, PTH_TMPL.format(resolution)) tf_weights = dump_tfhub_to_hdf5(module_path, hdf5_path, redownload=redownload) G_temp = getattr(biggan_for_conversion, f'Generator{resolution}')() state_dict_temp = G_temp.state_dict() converter = TFHub2Pytorch(state_dict_temp, tf_weights, resolution=resolution, load_ema=(not no_ema), verbose=verbose) state_dict_v1 = converter.load() state_dict = convert_from_v1(state_dict_v1, resolution) # Get the config, build the model config = get_config(resolution) G = BigGAN.Generator(**config) G.load_state_dict(state_dict, strict=False) # Ignore missing sv0 entries torch.save(state_dict, pth_path) # output_location ='pretrained_weights/TFHub-PyTorch-128.pth' return G def generate_sample(G, z_dim, batch_size, filename, parallel=False): G.eval() G.to(DEVICE) with torch.no_grad(): z = torch.randn(batch_size, G.dim_z).to(DEVICE) y = torch.randint(low=0, high=1000, size=(batch_size,), device=DEVICE, dtype=torch.int64, requires_grad=False) if parallel: images = nn.parallel.data_parallel(G, (z, G.shared(y))) else: images = G(z, G.shared(y)) save_image(images, filename, scale_each=True, normalize=True) def parse_args(): usage = 'Parser for conversion script.' parser = argparse.ArgumentParser(description=usage) parser.add_argument( '--resolution', '-r', type=int, default=None, choices=[128, 256, 512], help='Resolution of TFHub module to convert. Converts all resolutions if None.') parser.add_argument( '--redownload', action='store_true', default=False, help='Redownload weights and overwrite current hdf5 file, if present.') parser.add_argument( '--weights_dir', type=str, default='pretrained_weights') parser.add_argument( '--samples_dir', type=str, default='pretrained_samples') parser.add_argument( '--no_ema', action='store_true', default=False, help='Do not load ema weights.') parser.add_argument( '--verbose', action='store_true', default=False, help='Additionally logging.') parser.add_argument( '--generate_samples', action='store_true', default=False, help='Generate test sample with pretrained model.') parser.add_argument( '--batch_size', type=int, default=64, help='Batch size used for test sample.') parser.add_argument( '--parallel', action='store_true', default=False, help='Parallelize G?') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() os.makedirs(args.weights_dir, exist_ok=True) os.makedirs(args.samples_dir, exist_ok=True) if args.resolution is not None: G = convert_biggan(args.resolution, args.weights_dir, redownload=args.redownload, no_ema=args.no_ema, verbose=args.verbose) if args.generate_samples: filename = os.path.join(args.samples_dir, f'biggan{args.resolution}_samples.jpg') print('Generating samples...') generate_sample(G, Z_DIMS[args.resolution], args.batch_size, filename, args.parallel) else: for res in RESOLUTIONS: G = convert_biggan(res, args.weights_dir, redownload=args.redownload, no_ema=args.no_ema, verbose=args.verbose) if args.generate_samples: filename = os.path.join(args.samples_dir, f'biggan{res}_samples.jpg') print('Generating samples...') generate_sample(G, Z_DIMS[res], args.batch_size, filename, args.parallel) ================================================ FILE: FQ-BigGAN/animal_hash.py ================================================ c = ['Aardvark', 'Abyssinian', 'Affenpinscher', 'Akbash', 'Akita', 'Albatross', 'Alligator', 'Alpaca', 'Angelfish', 'Ant', 'Anteater', 'Antelope', 'Ape', 'Armadillo', 'Ass', 'Avocet', 'Axolotl', 'Baboon', 'Badger', 'Balinese', 'Bandicoot', 'Barb', 'Barnacle', 'Barracuda', 'Bat', 'Beagle', 'Bear', 'Beaver', 'Bee', 'Beetle', 'Binturong', 'Bird', 'Birman', 'Bison', 'Bloodhound', 'Boar', 'Bobcat', 'Bombay', 'Bongo', 'Bonobo', 'Booby', 'Budgerigar', 'Buffalo', 'Bulldog', 'Bullfrog', 'Burmese', 'Butterfly', 'Caiman', 'Camel', 'Capybara', 'Caracal', 'Caribou', 'Cassowary', 'Cat', 'Caterpillar', 'Catfish', 'Cattle', 'Centipede', 'Chameleon', 'Chamois', 'Cheetah', 'Chicken', 'Chihuahua', 'Chimpanzee', 'Chinchilla', 'Chinook', 'Chipmunk', 'Chough', 'Cichlid', 'Clam', 'Coati', 'Cobra', 'Cockroach', 'Cod', 'Collie', 'Coral', 'Cormorant', 'Cougar', 'Cow', 'Coyote', 'Crab', 'Crane', 'Crocodile', 'Crow', 'Curlew', 'Cuscus', 'Cuttlefish', 'Dachshund', 'Dalmatian', 'Deer', 'Dhole', 'Dingo', 'Dinosaur', 'Discus', 'Dodo', 'Dog', 'Dogball', 'Dogfish', 'Dolphin', 'Donkey', 'Dormouse', 'Dove', 'Dragonfly', 'Drever', 'Duck', 'Dugong', 'Dunker', 'Dunlin', 'Eagle', 'Earwig', 'Echidna', 'Eel', 'Eland', 'Elephant', 'ElephantSeal', 'Elk', 'Emu', 'Falcon', 'Ferret', 'Finch', 'Fish', 'Flamingo', 'Flounder', 'Fly', 'Fossa', 'Fox', 'Frigatebird', 'Frog', 'Galago', 'Gar', 'Gaur', 'Gazelle', 'Gecko', 'Gerbil', 'Gharial', 'GiantPanda', 'Gibbon', 'Giraffe', 'Gnat', 'Gnu', 'Goat', 'Goldfinch', 'Goldfish', 'Goose', 'Gopher', 'Gorilla', 'Goshawk', 'Grasshopper', 'Greyhound', 'Grouse', 'Guanaco', 'GuineaFowl', 'GuineaPig', 'Gull', 'Guppy', 'Hamster', 'Hare', 'Harrier', 'Havanese', 'Hawk', 'Hedgehog', 'Heron', 'Herring', 'Himalayan', 'Hippopotamus', 'Hornet', 'Horse', 'Human', 'Hummingbird', 'Hyena', 'Ibis', 'Iguana', 'Impala', 'Indri', 'Insect', 'Jackal', 'Jaguar', 'Javanese', 'Jay', 'Jellyfish', 'Kakapo', 'Kangaroo', 'Kingfisher', 'Kiwi', 'Koala', 'KomodoDragon', 'Kouprey', 'Kudu', 'Labradoodle', 'Ladybird', 'Lapwing', 'Lark', 'Lemming', 'Lemur', 'Leopard', 'Liger', 'Lion', 'Lionfish', 'Lizard', 'Llama', 'Lobster', 'Locust', 'Loris', 'Louse', 'Lynx', 'Lyrebird', 'Macaw', 'Magpie', 'Mallard', 'Maltese', 'Manatee', 'Mandrill', 'Markhor', 'Marten', 'Mastiff', 'Mayfly', 'Meerkat', 'Millipede', 'Mink', 'Mole', 'Molly', 'Mongoose', 'Mongrel', 'Monkey', 'Moorhen', 'Moose', 'Mosquito', 'Moth', 'Mouse', 'Mule', 'Narwhal', 'Neanderthal', 'Newfoundland', 'Newt', 'Nightingale', 'Numbat', 'Ocelot', 'Octopus', 'Okapi', 'Olm', 'Opossum', 'Orang-utan', 'Oryx', 'Ostrich', 'Otter', 'Owl', 'Ox', 'Oyster', 'Pademelon', 'Panther', 'Parrot', 'Partridge', 'Peacock', 'Peafowl', 'Pekingese', 'Pelican', 'Penguin', 'Persian', 'Pheasant', 'Pig', 'Pigeon', 'Pika', 'Pike', 'Piranha', 'Platypus', 'Pointer', 'Pony', 'Poodle', 'Porcupine', 'Porpoise', 'Possum', 'PrairieDog', 'Prawn', 'Puffin', 'Pug', 'Puma', 'Quail', 'Quelea', 'Quetzal', 'Quokka', 'Quoll', 'Rabbit', 'Raccoon', 'Ragdoll', 'Rail', 'Ram', 'Rat', 'Rattlesnake', 'Raven', 'RedDeer', 'RedPanda', 'Reindeer', 'Rhinoceros', 'Robin', 'Rook', 'Rottweiler', 'Ruff', 'Salamander', 'Salmon', 'SandDollar', 'Sandpiper', 'Saola', 'Sardine', 'Scorpion', 'SeaLion', 'SeaUrchin', 'Seahorse', 'Seal', 'Serval', 'Shark', 'Sheep', 'Shrew', 'Shrimp', 'Siamese', 'Siberian', 'Skunk', 'Sloth', 'Snail', 'Snake', 'Snowshoe', 'Somali', 'Sparrow', 'Spider', 'Sponge', 'Squid', 'Squirrel', 'Starfish', 'Starling', 'Stingray', 'Stinkbug', 'Stoat', 'Stork', 'Swallow', 'Swan', 'Tang', 'Tapir', 'Tarsier', 'Termite', 'Tetra', 'Tiffany', 'Tiger', 'Toad', 'Tortoise', 'Toucan', 'Tropicbird', 'Trout', 'Tuatara', 'Turkey', 'Turtle', 'Uakari', 'Uguisu', 'Umbrellabird', 'Viper', 'Vulture', 'Wallaby', 'Walrus', 'Warthog', 'Wasp', 'WaterBuffalo', 'Weasel', 'Whale', 'Whippet', 'Wildebeest', 'Wolf', 'Wolverine', 'Wombat', 'Woodcock', 'Woodlouse', 'Woodpecker', 'Worm', 'Wrasse', 'Wren', 'Yak', 'Zebra', 'Zebu', 'Zonkey'] a = ['able', 'above', 'absent', 'absolute', 'abstract', 'abundant', 'academic', 'acceptable', 'accepted', 'accessible', 'accurate', 'accused', 'active', 'actual', 'acute', 'added', 'additional', 'adequate', 'adjacent', 'administrative', 'adorable', 'advanced', 'adverse', 'advisory', 'aesthetic', 'afraid', 'african', 'aggregate', 'aggressive', 'agreeable', 'agreed', 'agricultural', 'alert', 'alive', 'alleged', 'allied', 'alone', 'alright', 'alternative', 'amateur', 'amazing', 'ambitious', 'american', 'amused', 'ancient', 'angry', 'annoyed', 'annual', 'anonymous', 'anxious', 'appalling', 'apparent', 'applicable', 'appropriate', 'arab', 'arbitrary', 'architectural', 'armed', 'arrogant', 'artificial', 'artistic', 'ashamed', 'asian', 'asleep', 'assistant', 'associated', 'atomic', 'attractive', 'australian', 'automatic', 'autonomous', 'available', 'average', 'awake', 'aware', 'awful', 'awkward', 'back', 'bad', 'balanced', 'bare', 'basic', 'beautiful', 'beneficial', 'better', 'bewildered', 'big', 'binding', 'biological', 'bitter', 'bizarre', 'black', 'blank', 'blind', 'blonde', 'bloody', 'blue', 'blushing', 'boiling', 'bold', 'bored', 'boring', 'bottom', 'brainy', 'brave', 'breakable', 'breezy', 'brief', 'bright', 'brilliant', 'british', 'broad', 'broken', 'brown', 'bumpy', 'burning', 'busy', 'calm', 'canadian', 'capable', 'capitalist', 'careful', 'casual', 'catholic', 'causal', 'cautious', 'central', 'certain', 'changing', 'characteristic', 'charming', 'cheap', 'cheerful', 'chemical', 'chief', 'chilly', 'chinese', 'chosen', 'christian', 'chronic', 'chubby', 'circular', 'civic', 'civil', 'civilian', 'classic', 'classical', 'clean', 'clear', 'clever', 'clinical', 'close', 'closed', 'cloudy', 'clumsy', 'coastal', 'cognitive', 'coherent', 'cold', 'collective', 'colonial', 'colorful', 'colossal', 'coloured', 'colourful', 'combative', 'combined', 'comfortable', 'coming', 'commercial', 'common', 'communist', 'compact', 'comparable', 'comparative', 'compatible', 'competent', 'competitive', 'complete', 'complex', 'complicated', 'comprehensive', 'compulsory', 'conceptual', 'concerned', 'concrete', 'condemned', 'confident', 'confidential', 'confused', 'conscious', 'conservation', 'conservative', 'considerable', 'consistent', 'constant', 'constitutional', 'contemporary', 'content', 'continental', 'continued', 'continuing', 'continuous', 'controlled', 'controversial', 'convenient', 'conventional', 'convinced', 'convincing', 'cooing', 'cool', 'cooperative', 'corporate', 'correct', 'corresponding', 'costly', 'courageous', 'crazy', 'creative', 'creepy', 'criminal', 'critical', 'crooked', 'crowded', 'crucial', 'crude', 'cruel', 'cuddly', 'cultural', 'curious', 'curly', 'current', 'curved', 'cute', 'daily', 'damaged', 'damp', 'dangerous', 'dark', 'dead', 'deaf', 'deafening', 'dear', 'decent', 'decisive', 'deep', 'defeated', 'defensive', 'defiant', 'definite', 'deliberate', 'delicate', 'delicious', 'delighted', 'delightful', 'democratic', 'dependent', 'depressed', 'desirable', 'desperate', 'detailed', 'determined', 'developed', 'developing', 'devoted', 'different', 'difficult', 'digital', 'diplomatic', 'direct', 'dirty', 'disabled', 'disappointed', 'disastrous', 'disciplinary', 'disgusted', 'distant', 'distinct', 'distinctive', 'distinguished', 'disturbed', 'disturbing', 'diverse', 'divine', 'dizzy', 'domestic', 'dominant', 'double', 'doubtful', 'drab', 'dramatic', 'dreadful', 'driving', 'drunk', 'dry', 'dual', 'due', 'dull', 'dusty', 'dutch', 'dying', 'dynamic', 'eager', 'early', 'eastern', 'easy', 'economic', 'educational', 'eerie', 'effective', 'efficient', 'elaborate', 'elated', 'elderly', 'eldest', 'electoral', 'electric', 'electrical', 'electronic', 'elegant', 'eligible', 'embarrassed', 'embarrassing', 'emotional', 'empirical', 'empty', 'enchanting', 'encouraging', 'endless', 'energetic', 'english', 'enormous', 'enthusiastic', 'entire', 'entitled', 'envious', 'environmental', 'equal', 'equivalent', 'essential', 'established', 'estimated', 'ethical', 'ethnic', 'european', 'eventual', 'everyday', 'evident', 'evil', 'evolutionary', 'exact', 'excellent', 'exceptional', 'excess', 'excessive', 'excited', 'exciting', 'exclusive', 'existing', 'exotic', 'expected', 'expensive', 'experienced', 'experimental', 'explicit', 'extended', 'extensive', 'external', 'extra', 'extraordinary', 'extreme', 'exuberant', 'faint', 'fair', 'faithful', 'familiar', 'famous', 'fancy', 'fantastic', 'far', 'fascinating', 'fashionable', 'fast', 'fat', 'fatal', 'favourable', 'favourite', 'federal', 'fellow', 'female', 'feminist', 'few', 'fierce', 'filthy', 'final', 'financial', 'fine', 'firm', 'fiscal', 'fit', 'fixed', 'flaky', 'flat', 'flexible', 'fluffy', 'fluttering', 'flying', 'following', 'fond', 'foolish', 'foreign', 'formal', 'formidable', 'forthcoming', 'fortunate', 'forward', 'fragile', 'frail', 'frantic', 'free', 'french', 'frequent', 'fresh', 'friendly', 'frightened', 'front', 'frozen', 'fucking', 'full', 'full-time', 'fun', 'functional', 'fundamental', 'funny', 'furious', 'future', 'fuzzy', 'gastric', 'gay', 'general', 'generous', 'genetic', 'gentle', 'genuine', 'geographical', 'german', 'giant', 'gigantic', 'given', 'glad', 'glamorous', 'gleaming', 'global', 'glorious', 'golden', 'good', 'gorgeous', 'gothic', 'governing', 'graceful', 'gradual', 'grand', 'grateful', 'greasy', 'great', 'greek', 'green', 'grey', 'grieving', 'grim', 'gross', 'grotesque', 'growing', 'grubby', 'grumpy', 'guilty', 'handicapped', 'handsome', 'happy', 'hard', 'harsh', 'head', 'healthy', 'heavy', 'helpful', 'helpless', 'hidden', 'high', 'high-pitched', 'hilarious', 'hissing', 'historic', 'historical', 'hollow', 'holy', 'homeless', 'homely', 'hon', 'honest', 'horizontal', 'horrible', 'hostile', 'hot', 'huge', 'human', 'hungry', 'hurt', 'hushed', 'husky', 'icy', 'ideal', 'identical', 'ideological', 'ill', 'illegal', 'imaginative', 'immediate', 'immense', 'imperial', 'implicit', 'important', 'impossible', 'impressed', 'impressive', 'improved', 'inadequate', 'inappropriate', 'inc', 'inclined', 'increased', 'increasing', 'incredible', 'independent', 'indian', 'indirect', 'individual', 'industrial', 'inevitable', 'influential', 'informal', 'inherent', 'initial', 'injured', 'inland', 'inner', 'innocent', 'innovative', 'inquisitive', 'instant', 'institutional', 'insufficient', 'intact', 'integral', 'integrated', 'intellectual', 'intelligent', 'intense', 'intensive', 'interested', 'interesting', 'interim', 'interior', 'intermediate', 'internal', 'international', 'intimate', 'invisible', 'involved', 'iraqi', 'irish', 'irrelevant', 'islamic', 'isolated', 'israeli', 'italian', 'itchy', 'japanese', 'jealous', 'jewish', 'jittery', 'joint', 'jolly', 'joyous', 'judicial', 'juicy', 'junior', 'just', 'keen', 'key', 'kind', 'known', 'korean', 'labour', 'large', 'large-scale', 'late', 'latin', 'lazy', 'leading', 'left', 'legal', 'legislative', 'legitimate', 'lengthy', 'lesser', 'level', 'lexical', 'liable', 'liberal', 'light', 'like', 'likely', 'limited', 'linear', 'linguistic', 'liquid', 'literary', 'little', 'live', 'lively', 'living', 'local', 'logical', 'lonely', 'long', 'long-term', 'loose', 'lost', 'loud', 'lovely', 'low', 'loyal', 'ltd', 'lucky', 'mad', 'magenta', 'magic', 'magnetic', 'magnificent', 'main', 'major', 'male', 'mammoth', 'managerial', 'managing', 'manual', 'many', 'marginal', 'marine', 'marked', 'married', 'marvellous', 'marxist', 'mass', 'massive', 'mathematical', 'mature', 'maximum', 'mean', 'meaningful', 'mechanical', 'medical', 'medieval', 'melodic', 'melted', 'mental', 'mere', 'metropolitan', 'mid', 'middle', 'middle-class', 'mighty', 'mild', 'military', 'miniature', 'minimal', 'minimum', 'ministerial', 'minor', 'miserable', 'misleading', 'missing', 'misty', 'mixed', 'moaning', 'mobile', 'moderate', 'modern', 'modest', 'molecular', 'monetary', 'monthly', 'moral', 'motionless', 'muddy', 'multiple', 'mushy', 'musical', 'mute', 'mutual', 'mysterious', 'naked', 'narrow', 'nasty', 'national', 'native', 'natural', 'naughty', 'naval', 'near', 'nearby', 'neat', 'necessary', 'negative', 'neighbouring', 'nervous', 'net', 'neutral', 'new', 'nice', 'nineteenth-century', 'noble', 'noisy', 'normal', 'northern', 'nosy', 'notable', 'novel', 'nuclear', 'numerous', 'nursing', 'nutritious', 'nutty', 'obedient', 'objective', 'obliged', 'obnoxious', 'obvious', 'occasional', 'occupational', 'odd', 'official', 'ok', 'okay', 'old', 'old-fashioned', 'olympic', 'only', 'open', 'operational', 'opposite', 'optimistic', 'oral', 'orange', 'ordinary', 'organic', 'organisational', 'original', 'orthodox', 'other', 'outdoor', 'outer', 'outrageous', 'outside', 'outstanding', 'overall', 'overseas', 'overwhelming', 'painful', 'pale', 'palestinian', 'panicky', 'parallel', 'parental', 'parliamentary', 'part-time', 'partial', 'particular', 'passing', 'passive', 'past', 'patient', 'payable', 'peaceful', 'peculiar', 'perfect', 'permanent', 'persistent', 'personal', 'petite', 'philosophical', 'physical', 'pink', 'plain', 'planned', 'plastic', 'pleasant', 'pleased', 'poised', 'polish', 'polite', 'political', 'poor', 'popular', 'positive', 'possible', 'post-war', 'potential', 'powerful', 'practical', 'precious', 'precise', 'preferred', 'pregnant', 'preliminary', 'premier', 'prepared', 'present', 'presidential', 'pretty', 'previous', 'prickly', 'primary', 'prime', 'primitive', 'principal', 'printed', 'prior', 'private', 'probable', 'productive', 'professional', 'profitable', 'profound', 'progressive', 'prominent', 'promising', 'proper', 'proposed', 'prospective', 'protective', 'protestant', 'proud', 'provincial', 'psychiatric', 'psychological', 'public', 'puny', 'pure', 'purple', 'purring', 'puzzled', 'quaint', 'qualified', 'quick', 'quickest', 'quiet', 'racial', 'radical', 'rainy', 'random', 'rapid', 'rare', 'raspy', 'rational', 'ratty', 'raw', 'ready', 'real', 'realistic', 'rear', 'reasonable', 'recent', 'red', 'reduced', 'redundant', 'regional', 'registered', 'regular', 'regulatory', 'related', 'relative', 'relaxed', 'relevant', 'reliable', 'relieved', 'religious', 'reluctant', 'remaining', 'remarkable', 'remote', 'renewed', 'representative', 'repulsive', 'required', 'resident', 'residential', 'resonant', 'respectable', 'respective', 'responsible', 'resulting', 'retail', 'retired', 'revolutionary', 'rich', 'ridiculous', 'right', 'rigid', 'ripe', 'rising', 'rival', 'roasted', 'robust', 'rolling', 'roman', 'romantic', 'rotten', 'rough', 'round', 'royal', 'rubber', 'rude', 'ruling', 'running', 'rural', 'russian', 'sacred', 'sad', 'safe', 'salty', 'satisfactory', 'satisfied', 'scared', 'scary', 'scattered', 'scientific', 'scornful', 'scottish', 'scrawny', 'screeching', 'secondary', 'secret', 'secure', 'select', 'selected', 'selective', 'selfish', 'semantic', 'senior', 'sensible', 'sensitive', 'separate', 'serious', 'severe', 'sexual', 'shaggy', 'shaky', 'shallow', 'shared', 'sharp', 'sheer', 'shiny', 'shivering', 'shocked', 'short', 'short-term', 'shrill', 'shy', 'sick', 'significant', 'silent', 'silky', 'silly', 'similar', 'simple', 'single', 'skilled', 'skinny', 'sleepy', 'slight', 'slim', 'slimy', 'slippery', 'slow', 'small', 'smart', 'smiling', 'smoggy', 'smooth', 'so-called', 'social', 'socialist', 'soft', 'solar', 'sole', 'solid', 'sophisticated', 'sore', 'sorry', 'sound', 'sour', 'southern', 'soviet', 'spanish', 'spare', 'sparkling', 'spatial', 'special', 'specific', 'specified', 'spectacular', 'spicy', 'spiritual', 'splendid', 'spontaneous', 'sporting', 'spotless', 'spotty', 'square', 'squealing', 'stable', 'stale', 'standard', 'static', 'statistical', 'statutory', 'steady', 'steep', 'sticky', 'stiff', 'still', 'stingy', 'stormy', 'straight', 'straightforward', 'strange', 'strategic', 'strict', 'striking', 'striped', 'strong', 'structural', 'stuck', 'stupid', 'subjective', 'subsequent', 'substantial', 'subtle', 'successful', 'successive', 'sudden', 'sufficient', 'suitable', 'sunny', 'super', 'superb', 'superior', 'supporting', 'supposed', 'supreme', 'sure', 'surprised', 'surprising', 'surrounding', 'surviving', 'suspicious', 'sweet', 'swift', 'swiss', 'symbolic', 'sympathetic', 'systematic', 'tall', 'tame', 'tan', 'tart', 'tasteless', 'tasty', 'technical', 'technological', 'teenage', 'temporary', 'tender', 'tense', 'terrible', 'territorial', 'testy', 'then', 'theoretical', 'thick', 'thin', 'thirsty', 'thorough', 'thoughtful', 'thoughtless', 'thundering', 'tight', 'tiny', 'tired', 'top', 'tory', 'total', 'tough', 'toxic', 'traditional', 'tragic', 'tremendous', 'tricky', 'tropical', 'troubled', 'turkish', 'typical', 'ugliest', 'ugly', 'ultimate', 'unable', 'unacceptable', 'unaware', 'uncertain', 'unchanged', 'uncomfortable', 'unconscious', 'underground', 'underlying', 'unemployed', 'uneven', 'unexpected', 'unfair', 'unfortunate', 'unhappy', 'uniform', 'uninterested', 'unique', 'united', 'universal', 'unknown', 'unlikely', 'unnecessary', 'unpleasant', 'unsightly', 'unusual', 'unwilling', 'upper', 'upset', 'uptight', 'urban', 'urgent', 'used', 'useful', 'useless', 'usual', 'vague', 'valid', 'valuable', 'variable', 'varied', 'various', 'varying', 'vast', 'verbal', 'vertical', 'very', 'victorian', 'victorious', 'video-taped', 'violent', 'visible', 'visiting', 'visual', 'vital', 'vivacious', 'vivid', 'vocational', 'voiceless', 'voluntary', 'vulnerable', 'wandering', 'warm', 'wasteful', 'watery', 'weak', 'wealthy', 'weary', 'wee', 'weekly', 'weird', 'welcome', 'well', 'well-known', 'welsh', 'western', 'wet', 'whispering', 'white', 'whole', 'wicked', 'wide', 'wide-eyed', 'widespread', 'wild', 'willing', 'wise', 'witty', 'wonderful', 'wooden', 'working', 'working-class', 'worldwide', 'worried', 'worrying', 'worthwhile', 'worthy', 'written', 'wrong', 'yellow', 'young', 'yummy', 'zany', 'zealous'] b = ['abiding', 'accelerating', 'accepting', 'accomplishing', 'achieving', 'acquiring', 'acteding', 'activating', 'adapting', 'adding', 'addressing', 'administering', 'admiring', 'admiting', 'adopting', 'advising', 'affording', 'agreeing', 'alerting', 'alighting', 'allowing', 'altereding', 'amusing', 'analyzing', 'announcing', 'annoying', 'answering', 'anticipating', 'apologizing', 'appearing', 'applauding', 'applieding', 'appointing', 'appraising', 'appreciating', 'approving', 'arbitrating', 'arguing', 'arising', 'arranging', 'arresting', 'arriving', 'ascertaining', 'asking', 'assembling', 'assessing', 'assisting', 'assuring', 'attaching', 'attacking', 'attaining', 'attempting', 'attending', 'attracting', 'auditeding', 'avoiding', 'awaking', 'backing', 'baking', 'balancing', 'baning', 'banging', 'baring', 'bating', 'bathing', 'battling', 'bing', 'beaming', 'bearing', 'beating', 'becoming', 'beging', 'begining', 'behaving', 'beholding', 'belonging', 'bending', 'beseting', 'beting', 'biding', 'binding', 'biting', 'bleaching', 'bleeding', 'blessing', 'blinding', 'blinking', 'bloting', 'blowing', 'blushing', 'boasting', 'boiling', 'bolting', 'bombing', 'booking', 'boring', 'borrowing', 'bouncing', 'bowing', 'boxing', 'braking', 'branching', 'breaking', 'breathing', 'breeding', 'briefing', 'bringing', 'broadcasting', 'bruising', 'brushing', 'bubbling', 'budgeting', 'building', 'bumping', 'burning', 'bursting', 'burying', 'busting', 'buying', 'buzing', 'calculating', 'calling', 'camping', 'caring', 'carrying', 'carving', 'casting', 'cataloging', 'catching', 'causing', 'challenging', 'changing', 'charging', 'charting', 'chasing', 'cheating', 'checking', 'cheering', 'chewing', 'choking', 'choosing', 'choping', 'claiming', 'claping', 'clarifying', 'classifying', 'cleaning', 'clearing', 'clinging', 'cliping', 'closing', 'clothing', 'coaching', 'coiling', 'collecting', 'coloring', 'combing', 'coming', 'commanding', 'communicating', 'comparing', 'competing', 'compiling', 'complaining', 'completing', 'composing', 'computing', 'conceiving', 'concentrating', 'conceptualizing', 'concerning', 'concluding', 'conducting', 'confessing', 'confronting', 'confusing', 'connecting', 'conserving', 'considering', 'consisting', 'consolidating', 'constructing', 'consulting', 'containing', 'continuing', 'contracting', 'controling', 'converting', 'coordinating', 'copying', 'correcting', 'correlating', 'costing', 'coughing', 'counseling', 'counting', 'covering', 'cracking', 'crashing', 'crawling', 'creating', 'creeping', 'critiquing', 'crossing', 'crushing', 'crying', 'curing', 'curling', 'curving', 'cuting', 'cycling', 'daming', 'damaging', 'dancing', 'daring', 'dealing', 'decaying', 'deceiving', 'deciding', 'decorating', 'defining', 'delaying', 'delegating', 'delighting', 'delivering', 'demonstrating', 'depending', 'describing', 'deserting', 'deserving', 'designing', 'destroying', 'detailing', 'detecting', 'determining', 'developing', 'devising', 'diagnosing', 'diging', 'directing', 'disagreing', 'disappearing', 'disapproving', 'disarming', 'discovering', 'disliking', 'dispensing', 'displaying', 'disproving', 'dissecting', 'distributing', 'diving', 'diverting', 'dividing', 'doing', 'doubling', 'doubting', 'drafting', 'draging', 'draining', 'dramatizing', 'drawing', 'dreaming', 'dressing', 'drinking', 'driping', 'driving', 'dropping', 'drowning', 'druming', 'drying', 'dusting', 'dwelling', 'earning', 'eating', 'editeding', 'educating', 'eliminating', 'embarrassing', 'employing', 'emptying', 'enacteding', 'encouraging', 'ending', 'enduring', 'enforcing', 'engineering', 'enhancing', 'enjoying', 'enlisting', 'ensuring', 'entering', 'entertaining', 'escaping', 'establishing', 'estimating', 'evaluating', 'examining', 'exceeding', 'exciting', 'excusing', 'executing', 'exercising', 'exhibiting', 'existing', 'expanding', 'expecting', 'expediting', 'experimenting', 'explaining', 'exploding', 'expressing', 'extending', 'extracting', 'facing', 'facilitating', 'fading', 'failing', 'fancying', 'fastening', 'faxing', 'fearing', 'feeding', 'feeling', 'fencing', 'fetching', 'fighting', 'filing', 'filling', 'filming', 'finalizing', 'financing', 'finding', 'firing', 'fiting', 'fixing', 'flaping', 'flashing', 'fleing', 'flinging', 'floating', 'flooding', 'flowing', 'flowering', 'flying', 'folding', 'following', 'fooling', 'forbiding', 'forcing', 'forecasting', 'foregoing', 'foreseing', 'foretelling', 'forgeting', 'forgiving', 'forming', 'formulating', 'forsaking', 'framing', 'freezing', 'frightening', 'frying', 'gathering', 'gazing', 'generating', 'geting', 'giving', 'glowing', 'gluing', 'going', 'governing', 'grabing', 'graduating', 'grating', 'greasing', 'greeting', 'grinning', 'grinding', 'griping', 'groaning', 'growing', 'guaranteeing', 'guarding', 'guessing', 'guiding', 'hammering', 'handing', 'handling', 'handwriting', 'hanging', 'happening', 'harassing', 'harming', 'hating', 'haunting', 'heading', 'healing', 'heaping', 'hearing', 'heating', 'helping', 'hiding', 'hitting', 'holding', 'hooking', 'hoping', 'hopping', 'hovering', 'hugging', 'hmuming', 'hunting', 'hurrying', 'hurting', 'hypothesizing', 'identifying', 'ignoring', 'illustrating', 'imagining', 'implementing', 'impressing', 'improving', 'improvising', 'including', 'increasing', 'inducing', 'influencing', 'informing', 'initiating', 'injecting', 'injuring', 'inlaying', 'innovating', 'inputing', 'inspecting', 'inspiring', 'installing', 'instituting', 'instructing', 'insuring', 'integrating', 'intending', 'intensifying', 'interesting', 'interfering', 'interlaying', 'interpreting', 'interrupting', 'interviewing', 'introducing', 'inventing', 'inventorying', 'investigating', 'inviting', 'irritating', 'itching', 'jailing', 'jamming', 'jogging', 'joining', 'joking', 'judging', 'juggling', 'jumping', 'justifying', 'keeping', 'kepting', 'kicking', 'killing', 'kissing', 'kneeling', 'kniting', 'knocking', 'knotting', 'knowing', 'labeling', 'landing', 'lasting', 'laughing', 'launching', 'laying', 'leading', 'leaning', 'leaping', 'learning', 'leaving', 'lecturing', 'leding', 'lending', 'leting', 'leveling', 'licensing', 'licking', 'lying', 'lifteding', 'lighting', 'lightening', 'liking', 'listing', 'listening', 'living', 'loading', 'locating', 'locking', 'loging', 'longing', 'looking', 'losing', 'loving', 'maintaining', 'making', 'maning', 'managing', 'manipulating', 'manufacturing', 'mapping', 'marching', 'marking', 'marketing', 'marrying', 'matching', 'mating', 'mattering', 'meaning', 'measuring', 'meddling', 'mediating', 'meeting', 'melting', 'melting', 'memorizing', 'mending', 'mentoring', 'milking', 'mining', 'misleading', 'missing', 'misspelling', 'mistaking', 'misunderstanding', 'mixing', 'moaning', 'modeling', 'modifying', 'monitoring', 'mooring', 'motivating', 'mourning', 'moving', 'mowing', 'muddling', 'muging', 'multiplying', 'murdering', 'nailing', 'naming', 'navigating', 'needing', 'negotiating', 'nesting', 'noding', 'nominating', 'normalizing', 'noting', 'noticing', 'numbering', 'obeying', 'objecting', 'observing', 'obtaining', 'occuring', 'offending', 'offering', 'officiating', 'opening', 'operating', 'ordering', 'organizing', 'orienteding', 'originating', 'overcoming', 'overdoing', 'overdrawing', 'overflowing', 'overhearing', 'overtaking', 'overthrowing', 'owing', 'owning', 'packing', 'paddling', 'painting', 'parking', 'parting', 'participating', 'passing', 'pasting', 'pating', 'pausing', 'paying', 'pecking', 'pedaling', 'peeling', 'peeping', 'perceiving', 'perfecting', 'performing', 'permiting', 'persuading', 'phoning', 'photographing', 'picking', 'piloting', 'pinching', 'pining', 'pinpointing', 'pioneering', 'placing', 'planing', 'planting', 'playing', 'pleading', 'pleasing', 'plugging', 'pointing', 'poking', 'polishing', 'poping', 'possessing', 'posting', 'pouring', 'practicing', 'praiseding', 'praying', 'preaching', 'preceding', 'predicting', 'prefering', 'preparing', 'prescribing', 'presenting', 'preserving', 'preseting', 'presiding', 'pressing', 'pretending', 'preventing', 'pricking', 'printing', 'processing', 'procuring', 'producing', 'professing', 'programing', 'progressing', 'projecting', 'promising', 'promoting', 'proofreading', 'proposing', 'protecting', 'proving', 'providing', 'publicizing', 'pulling', 'pumping', 'punching', 'puncturing', 'punishing', 'purchasing', 'pushing', 'puting', 'qualifying', 'questioning', 'queuing', 'quiting', 'racing', 'radiating', 'raining', 'raising', 'ranking', 'rating', 'reaching', 'reading', 'realigning', 'realizing', 'reasoning', 'receiving', 'recognizing', 'recommending', 'reconciling', 'recording', 'recruiting', 'reducing', 'referring', 'reflecting', 'refusing', 'regreting', 'regulating', 'rehabilitating', 'reigning', 'reinforcing', 'rejecting', 'rejoicing', 'relating', 'relaxing', 'releasing', 'relying', 'remaining', 'remembering', 'reminding', 'removing', 'rendering', 'reorganizing', 'repairing', 'repeating', 'replacing', 'replying', 'reporting', 'representing', 'reproducing', 'requesting', 'rescuing', 'researching', 'resolving', 'responding', 'restoreding', 'restructuring', 'retiring', 'retrieving', 'returning', 'reviewing', 'revising', 'rhyming', 'riding', 'riding', 'ringing', 'rinsing', 'rising', 'risking', 'robing', 'rocking', 'rolling', 'roting', 'rubing', 'ruining', 'ruling', 'runing', 'rushing', 'sacking', 'sailing', 'satisfying', 'saving', 'sawing', 'saying', 'scaring', 'scattering', 'scheduling', 'scolding', 'scorching', 'scraping', 'scratching', 'screaming', 'screwing', 'scribbling', 'scrubing', 'sealing', 'searching', 'securing', 'seing', 'seeking', 'selecting', 'selling', 'sending', 'sensing', 'separating', 'serving', 'servicing', 'seting', 'settling', 'sewing', 'shading', 'shaking', 'shaping', 'sharing', 'shaving', 'shearing', 'sheding', 'sheltering', 'shining', 'shivering', 'shocking', 'shoing', 'shooting', 'shoping', 'showing', 'shrinking', 'shruging', 'shuting', 'sighing', 'signing', 'signaling', 'simplifying', 'sining', 'singing', 'sinking', 'siping', 'siting', 'sketching', 'skiing', 'skiping', 'slaping', 'slaying', 'sleeping', 'sliding', 'slinging', 'slinking', 'sliping', 'sliting', 'slowing', 'smashing', 'smelling', 'smiling', 'smiting', 'smoking', 'snatching', 'sneaking', 'sneezing', 'sniffing', 'snoring', 'snowing', 'soaking', 'solving', 'soothing', 'soothsaying', 'sorting', 'sounding', 'sowing', 'sparing', 'sparking', 'sparkling', 'speaking', 'specifying', 'speeding', 'spelling', 'spending', 'spilling', 'spining', 'spiting', 'spliting', 'spoiling', 'spoting', 'spraying', 'spreading', 'springing', 'sprouting', 'squashing', 'squeaking', 'squealing', 'squeezing', 'staining', 'stamping', 'standing', 'staring', 'starting', 'staying', 'stealing', 'steering', 'stepping', 'sticking', 'stimulating', 'stinging', 'stinking', 'stirring', 'stitching', 'stoping', 'storing', 'straping', 'streamlining', 'strengthening', 'stretching', 'striding', 'striking', 'stringing', 'stripping', 'striving', 'stroking', 'structuring', 'studying', 'stuffing', 'subleting', 'subtracting', 'succeeding', 'sucking', 'suffering', 'suggesting', 'suiting', 'summarizing', 'supervising', 'supplying', 'supporting', 'supposing', 'surprising', 'surrounding', 'suspecting', 'suspending', 'swearing', 'sweating', 'sweeping', 'swelling', 'swimming', 'swinging', 'switching', 'symbolizing', 'synthesizing', 'systemizing', 'tabulating', 'taking', 'talking', 'taming', 'taping', 'targeting', 'tasting', 'teaching', 'tearing', 'teasing', 'telephoning', 'telling', 'tempting', 'terrifying', 'testing', 'thanking', 'thawing', 'thinking', 'thriving', 'throwing', 'thrusting', 'ticking', 'tickling', 'tying', 'timing', 'tiping', 'tiring', 'touching', 'touring', 'towing', 'tracing', 'trading', 'training', 'transcribing', 'transfering', 'transforming', 'translating', 'transporting', 'traping', 'traveling', 'treading', 'treating', 'trembling', 'tricking', 'triping', 'troting', 'troubling', 'troubleshooting', 'trusting', 'trying', 'tuging', 'tumbling', 'turning', 'tutoring', 'twisting', 'typing', 'undergoing', 'understanding', 'undertaking', 'undressing', 'unfastening', 'unifying', 'uniting', 'unlocking', 'unpacking', 'untidying', 'updating', 'upgrading', 'upholding', 'upseting', 'using', 'utilizing', 'vanishing', 'verbalizing', 'verifying', 'vexing', 'visiting', 'wailing', 'waiting', 'waking', 'walking', 'wandering', 'wanting', 'warming', 'warning', 'washing', 'wasting', 'watching', 'watering', 'waving', 'wearing', 'weaving', 'wedding', 'weeping', 'weighing', 'welcoming', 'wending', 'weting', 'whining', 'whiping', 'whirling', 'whispering', 'whistling', 'wining', 'winding', 'winking', 'wiping', 'wishing', 'withdrawing', 'withholding', 'withstanding', 'wobbling', 'wondering', 'working', 'worrying', 'wrapping', 'wrecking', 'wrestling', 'wriggling', 'wringing', 'writing', 'x-raying', 'yawning', 'yelling', 'zipping', 'zooming'] ================================================ FILE: FQ-BigGAN/calculate_inception_moments.py ================================================ ''' Calculate Inception Moments This script iterates over the dataset and calculates the moments of the activations of the Inception net (needed for FID), and also returns the Inception Score of the training data. Note that if you don't shuffle the data, the IS of true data will be under- estimated as it is label-ordered. By default, the data is not shuffled so as to reduce non-determinism. ''' import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import utils import inception_utils from tqdm import tqdm, trange from argparse import ArgumentParser def prepare_parser(): usage = 'Calculate and store inception metrics.' parser = ArgumentParser(description=usage) parser.add_argument( '--dataset', type=str, default='I128_hdf5', help='Which Dataset to train on, out of I128, I256, C10, C100...' 'Append _hdf5 to use the hdf5 version of the dataset. (default: %(default)s)') parser.add_argument( '--data_root', type=str, default='data', help='Default location where data is stored (default: %(default)s)') parser.add_argument( '--batch_size', type=int, default=64, help='Default overall batchsize (default: %(default)s)') parser.add_argument( '--parallel', action='store_true', default=False, help='Train with multiple GPUs (default: %(default)s)') parser.add_argument( '--augment', action='store_true', default=False, help='Augment with random crops and flips (default: %(default)s)') parser.add_argument( '--num_workers', type=int, default=8, help='Number of dataloader workers (default: %(default)s)') parser.add_argument( '--shuffle', action='store_true', default=False, help='Shuffle the data? (default: %(default)s)') parser.add_argument( '--seed', type=int, default=0, help='Random seed to use.') return parser def run(config): # Get loader config['drop_last'] = False loaders = utils.get_data_loaders(**config) # Load inception net net = inception_utils.load_inception_net(parallel=config['parallel']) pool, logits, labels = [], [], [] device = 'cuda' for i, (x, y) in enumerate(tqdm(loaders[0])): x = x.to(device) with torch.no_grad(): pool_val, logits_val = net(x) pool += [np.asarray(pool_val.cpu())] logits += [np.asarray(F.softmax(logits_val, 1).cpu())] labels += [np.asarray(y.cpu())] pool, logits, labels = [np.concatenate(item, 0) for item in [pool, logits, labels]] # uncomment to save pool, logits, and labels to disk # print('Saving pool, logits, and labels to disk...') # np.savez(config['dataset']+'_inception_activations.npz', # {'pool': pool, 'logits': logits, 'labels': labels}) # Calculate inception metrics and report them print('Calculating inception metrics...') IS_mean, IS_std = inception_utils.calculate_inception_score(logits) print('Training data from dataset %s has IS of %5.5f +/- %5.5f' % (config['dataset'], IS_mean, IS_std)) # Prepare mu and sigma, save to disk. Remove "hdf5" by default # (the FID code also knows to strip "hdf5") print('Calculating means and covariances...') mu, sigma = np.mean(pool, axis=0), np.cov(pool, rowvar=False) print('Saving calculated means and covariances to disk...') np.savez(config['dataset'].strip('_hdf5')+'_inception_moments.npz', **{'mu' : mu, 'sigma' : sigma}) def main(): # parse command line parser = prepare_parser() config = vars(parser.parse_args()) print(config) run(config) if __name__ == '__main__': main() ================================================ FILE: FQ-BigGAN/datasets.py ================================================ ''' Datasets This file contains definitions for our CIFAR, ImageFolder, and HDF5 datasets ''' import os import os.path import sys from PIL import Image import numpy as np from tqdm import tqdm, trange import torchvision.datasets as dset import torchvision.transforms as transforms from torchvision.datasets.utils import download_url, check_integrity import torch.utils.data as data from torch.utils.data import DataLoader IMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm'] def is_image_file(filename): """Checks if a file is an image. Args: filename (string): path to a file Returns: bool: True if the filename ends with a known image extension """ filename_lower = filename.lower() return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS) def find_classes(dir): classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))] classes.sort() class_to_idx = {classes[i]: i for i in range(len(classes))} return classes, class_to_idx def make_dataset(dir, class_to_idx): images = [] dir = os.path.expanduser(dir) for target in tqdm(sorted(os.listdir(dir))): d = os.path.join(dir, target) if not os.path.isdir(d): continue for root, _, fnames in sorted(os.walk(d)): for fname in sorted(fnames): if is_image_file(fname): path = os.path.join(root, fname) item = (path, class_to_idx[target]) images.append(item) return images def pil_loader(path): # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) with open(path, 'rb') as f: img = Image.open(f) return img.convert('RGB') def accimage_loader(path): import accimage try: return accimage.Image(path) except IOError: # Potentially a decoding problem, fall back to PIL.Image return pil_loader(path) def default_loader(path): from torchvision import get_image_backend if get_image_backend() == 'accimage': return accimage_loader(path) else: return pil_loader(path) class ImageFolder(data.Dataset): """A generic data loader where the images are arranged in this way: :: root/dogball/xxx.png root/dogball/xxy.png root/dogball/xxz.png root/cat/123.png root/cat/nsdf3.png root/cat/asd932_.png Args: root (string): Root directory path. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, ``transforms.RandomCrop`` target_transform (callable, optional): A function/transform that takes in the target and transforms it. loader (callable, optional): A function to load an image given its path. Attributes: classes (list): List of the class names. class_to_idx (dict): Dict with items (class_name, class_index). imgs (list): List of (image path, class_index) tuples """ def __init__(self, root, transform=None, target_transform=None, loader=default_loader, load_in_mem=False, index_filename='imagenet_imgs.npz', **kwargs): classes, class_to_idx = find_classes(root) # Load pre-computed image directory walk if os.path.exists(index_filename): print('Loading pre-saved Index file %s...' % index_filename) imgs = np.load(index_filename)['imgs'] # If first time, walk the folder directory and save the # results to a pre-computed file. else: print('Generating Index file %s...' % index_filename) imgs = make_dataset(root, class_to_idx) np.savez_compressed(index_filename, **{'imgs' : imgs}) if len(imgs) == 0: raise(RuntimeError("Found 0 images in subfolders of: " + root + "\n" "Supported image extensions are: " + ",".join(IMG_EXTENSIONS))) self.root = root self.imgs = imgs self.classes = classes self.class_to_idx = class_to_idx self.transform = transform self.target_transform = target_transform self.loader = loader self.load_in_mem = load_in_mem if self.load_in_mem: print('Loading all images into memory...') self.data, self.labels = [], [] for index in tqdm(range(len(self.imgs))): path, target = imgs[index][0], imgs[index][1] self.data.append(self.transform(self.loader(path))) self.labels.append(target) def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is class_index of the target class. """ if self.load_in_mem: img = self.data[index] target = self.labels[index] else: path, target = self.imgs[index] img = self.loader(str(path)) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) # print(img.size(), target) return img, int(target) def __len__(self): return len(self.imgs) def __repr__(self): fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) fmt_str += ' Root Location: {}\n'.format(self.root) tmp = ' Transforms (if any): ' fmt_str += '{0}{1}\n'.format(tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) tmp = ' Target Transforms (if any): ' fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) return fmt_str ''' ILSVRC_HDF5: A dataset to support I/O from an HDF5 to avoid having to load individual images all the time. ''' import h5py as h5 import torch class ILSVRC_HDF5(data.Dataset): def __init__(self, root, transform=None, target_transform=None, load_in_mem=False, train=True,download=False, validate_seed=0, val_split=0, **kwargs): # last four are dummies self.root = root self.num_imgs = len(h5.File(root, 'r')['labels']) # self.transform = transform self.target_transform = target_transform # Set the transform here self.transform = transform # load the entire dataset into memory? self.load_in_mem = load_in_mem # If loading into memory, do so now if self.load_in_mem: print('Loading %s into memory...' % root) with h5.File(root,'r') as f: self.data = f['imgs'][:] self.labels = f['labels'][:] def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is class_index of the target class. """ # If loaded the entire dataset in RAM, get image from memory if self.load_in_mem: img = self.data[index] target = self.labels[index] # Else load it from disk else: with h5.File(self.root,'r') as f: img = f['imgs'][index] target = f['labels'][index] # if self.transform is not None: # img = self.transform(img) # Apply my own transform img = ((torch.from_numpy(img).float() / 255) - 0.5) * 2 if self.target_transform is not None: target = self.target_transform(target) return img, int(target) def __len__(self): return self.num_imgs # return len(self.f['imgs']) import pickle class CIFAR10(dset.CIFAR10): def __init__(self, root, train=True, transform=None, target_transform=None, download=True, validate_seed=0, val_split=0, load_in_mem=True, **kwargs): self.root = os.path.expanduser(root) self.transform = transform self.target_transform = target_transform self.train = train # training set or test set self.val_split = val_split if download: self.download() if not self._check_integrity(): raise RuntimeError('Dataset not found or corrupted.' + ' You can use download=True to download it') # now load the picked numpy arrays self.data = [] self.labels= [] for fentry in self.train_list: f = fentry[0] file = os.path.join(self.root, self.base_folder, f) fo = open(file, 'rb') if sys.version_info[0] == 2: entry = pickle.load(fo) else: entry = pickle.load(fo, encoding='latin1') self.data.append(entry['data']) if 'labels' in entry: self.labels += entry['labels'] else: self.labels += entry['fine_labels'] fo.close() self.data = np.concatenate(self.data) # Randomly select indices for validation if self.val_split > 0: label_indices = [[] for _ in range(max(self.labels)+1)] for i,l in enumerate(self.labels): label_indices[l] += [i] label_indices = np.asarray(label_indices) # randomly grab 500 elements of each class np.random.seed(validate_seed) self.val_indices = [] for l_i in label_indices: self.val_indices += list(l_i[np.random.choice(len(l_i), int(len(self.data) * val_split) // (max(self.labels) + 1) ,replace=False)]) if self.train=='validate': self.data = self.data[self.val_indices] self.labels = list(np.asarray(self.labels)[self.val_indices]) self.data = self.data.reshape((int(50e3 * self.val_split), 3, 32, 32)) self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC elif self.train: print(np.shape(self.data)) if self.val_split > 0: self.data = np.delete(self.data,self.val_indices,axis=0) self.labels = list(np.delete(np.asarray(self.labels),self.val_indices,axis=0)) self.data = self.data.reshape((int(50e3 * (1.-self.val_split)), 3, 32, 32)) self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC else: f = self.test_list[0][0] file = os.path.join(self.root, self.base_folder, f) fo = open(file, 'rb') if sys.version_info[0] == 2: entry = pickle.load(fo) else: entry = pickle.load(fo, encoding='latin1') self.data = entry['data'] if 'labels' in entry: self.labels = entry['labels'] else: self.labels = entry['fine_labels'] fo.close() self.data = self.data.reshape((10000, 3, 32, 32)) self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: (image, target) where target is index of the target class. """ img, target = self.data[index], self.labels[index] # doing this so that it is consistent with all other datasets # to return a PIL Image img = Image.fromarray(img) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) return img, target def __len__(self): return len(self.data) class CIFAR100(CIFAR10): base_folder = 'cifar-100-python' url = "http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz" filename = "cifar-100-python.tar.gz" tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85' train_list = [ ['train', '16019d7e3df5f24257cddd939b257f8d'], ] test_list = [ ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'], ] ================================================ FILE: FQ-BigGAN/inception_tf13.py ================================================ ''' Tensorflow inception score code Derived from https://github.com/openai/improved-gan Code derived from tensorflow/tensorflow/models/image/imagenet/classify_image.py THIS CODE REQUIRES TENSORFLOW 1.3 or EARLIER to run in PARALLEL BATCH MODE To use this code, run sample.py on your model with --sample_npz, and then pass the experiment name in the --experiment_name. This code also saves pool3 stats to an npz file for FID calculation ''' from __future__ import absolute_import from __future__ import division from __future__ import print_function import os.path import sys import tarfile import math from tqdm import tqdm, trange from argparse import ArgumentParser import numpy as np from six.moves import urllib import tensorflow as tf MODEL_DIR = '' DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' softmax = None def prepare_parser(): usage = 'Parser for TF1.3- Inception Score scripts.' parser = ArgumentParser(description=usage) parser.add_argument( '--experiment_name', type=str, default='', help='Which experiment''s samples.npz file to pull and evaluate') parser.add_argument( '--experiment_root', type=str, default='samples', help='Default location where samples are stored (default: %(default)s)') parser.add_argument( '--batch_size', type=int, default=500, help='Default overall batchsize (default: %(default)s)') return parser def run(config): # Inception with TF1.3 or earlier. # Call this function with list of images. Each of elements should be a # numpy array with values ranging from 0 to 255. def get_inception_score(images, splits=10): assert(type(images) == list) assert(type(images[0]) == np.ndarray) assert(len(images[0].shape) == 3) assert(np.max(images[0]) > 10) assert(np.min(images[0]) >= 0.0) inps = [] for img in images: img = img.astype(np.float32) inps.append(np.expand_dims(img, 0)) bs = config['batch_size'] with tf.Session() as sess: preds, pools = [], [] n_batches = int(math.ceil(float(len(inps)) / float(bs))) for i in trange(n_batches): inp = inps[(i * bs):min((i + 1) * bs, len(inps))] inp = np.concatenate(inp, 0) pred, pool = sess.run([softmax, pool3], {'ExpandDims:0': inp}) preds.append(pred) pools.append(pool) preds = np.concatenate(preds, 0) scores = [] for i in range(splits): part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :] kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0))) kl = np.mean(np.sum(kl, 1)) scores.append(np.exp(kl)) return np.mean(scores), np.std(scores), np.squeeze(np.concatenate(pools, 0)) # Init inception def _init_inception(): global softmax, pool3 if not os.path.exists(MODEL_DIR): os.makedirs(MODEL_DIR) filename = DATA_URL.split('/')[-1] filepath = os.path.join(MODEL_DIR, filename) if not os.path.exists(filepath): def _progress(count, block_size, total_size): sys.stdout.write('\r>> Downloading %s %.1f%%' % ( filename, float(count * block_size) / float(total_size) * 100.0)) sys.stdout.flush() filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) print() statinfo = os.stat(filepath) print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.') tarfile.open(filepath, 'r:gz').extractall(MODEL_DIR) with tf.gfile.FastGFile(os.path.join( MODEL_DIR, 'classify_image_graph_def.pb'), 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(graph_def, name='') # Works with an arbitrary minibatch size. with tf.Session() as sess: pool3 = sess.graph.get_tensor_by_name('pool_3:0') ops = pool3.graph.get_operations() for op_idx, op in enumerate(ops): for o in op.outputs: shape = o.get_shape() shape = [s.value for s in shape] new_shape = [] for j, s in enumerate(shape): if s == 1 and j == 0: new_shape.append(None) else: new_shape.append(s) o._shape = tf.TensorShape(new_shape) w = sess.graph.get_operation_by_name("softmax/logits/MatMul").inputs[1] logits = tf.matmul(tf.squeeze(pool3), w) softmax = tf.nn.softmax(logits) # if softmax is None: # No need to functionalize like this. _init_inception() fname = '%s/%s/samples.npz' % (config['experiment_root'], config['experiment_name']) print('loading %s ...'%fname) ims = np.load(fname)['x'] import time t0 = time.time() inc_mean, inc_std, pool_activations = get_inception_score(list(ims.swapaxes(1,2).swapaxes(2,3)), splits=10) t1 = time.time() print('Saving pool to numpy file for FID calculations...') np.savez('%s/%s/TF_pool.npz' % (config['experiment_root'], config['experiment_name']), **{'pool_mean': np.mean(pool_activations,axis=0), 'pool_var': np.cov(pool_activations, rowvar=False)}) print('Inception took %3f seconds, score of %3f +/- %3f.'%(t1-t0, inc_mean, inc_std)) def main(): # parse command line and run parser = prepare_parser() config = vars(parser.parse_args()) print(config) run(config) if __name__ == '__main__': main() ================================================ FILE: FQ-BigGAN/inception_utils.py ================================================ ''' Inception utilities This file contains methods for calculating IS and FID, using either the original numpy code or an accelerated fully-pytorch version that uses a fast newton-schulz approximation for the matrix sqrt. There are also methods for acquiring a desired number of samples from the Generator, and parallelizing the inbuilt PyTorch inception network. NOTE that Inception Scores and FIDs calculated using these methods will *not* be directly comparable to values calculated using the original TF IS/FID code. You *must* use the TF model if you wish to report and compare numbers. This code tends to produce IS values that are 5-10% lower than those obtained through TF. ''' import numpy as np from scipy import linalg # For numpy FID import time import torch import torch.nn as nn import torch.nn.functional as F from torch.nn import Parameter as P from torchvision.models.inception import inception_v3 # Module that wraps the inception network to enable use with dataparallel and # returning pool features and logits. class WrapInception(nn.Module): def __init__(self, net): super(WrapInception,self).__init__() self.net = net self.mean = P(torch.tensor([0.485, 0.456, 0.406]).view(1, -1, 1, 1), requires_grad=False) self.std = P(torch.tensor([0.229, 0.224, 0.225]).view(1, -1, 1, 1), requires_grad=False) def forward(self, x): # Normalize x x = (x + 1.) / 2.0 x = (x - self.mean) / self.std # Upsample if necessary if x.shape[2] != 299 or x.shape[3] != 299: x = F.interpolate(x, size=(299, 299), mode='bilinear', align_corners=True) # 299 x 299 x 3 x = self.net.Conv2d_1a_3x3(x) # 149 x 149 x 32 x = self.net.Conv2d_2a_3x3(x) # 147 x 147 x 32 x = self.net.Conv2d_2b_3x3(x) # 147 x 147 x 64 x = F.max_pool2d(x, kernel_size=3, stride=2) # 73 x 73 x 64 x = self.net.Conv2d_3b_1x1(x) # 73 x 73 x 80 x = self.net.Conv2d_4a_3x3(x) # 71 x 71 x 192 x = F.max_pool2d(x, kernel_size=3, stride=2) # 35 x 35 x 192 x = self.net.Mixed_5b(x) # 35 x 35 x 256 x = self.net.Mixed_5c(x) # 35 x 35 x 288 x = self.net.Mixed_5d(x) # 35 x 35 x 288 x = self.net.Mixed_6a(x) # 17 x 17 x 768 x = self.net.Mixed_6b(x) # 17 x 17 x 768 x = self.net.Mixed_6c(x) # 17 x 17 x 768 x = self.net.Mixed_6d(x) # 17 x 17 x 768 x = self.net.Mixed_6e(x) # 17 x 17 x 768 # 17 x 17 x 768 x = self.net.Mixed_7a(x) # 8 x 8 x 1280 x = self.net.Mixed_7b(x) # 8 x 8 x 2048 x = self.net.Mixed_7c(x) # 8 x 8 x 2048 pool = torch.mean(x.view(x.size(0), x.size(1), -1), 2) # 1 x 1 x 2048 logits = self.net.fc(F.dropout(pool, training=False).view(pool.size(0), -1)) # 1000 (num_classes) return pool, logits # A pytorch implementation of cov, from Modar M. Alfadly # https://discuss.pytorch.org/t/covariance-and-gradient-support/16217/2 def torch_cov(m, rowvar=False): '''Estimate a covariance matrix given data. Covariance indicates the level to which two variables vary together. If we examine N-dimensional samples, `X = [x_1, x_2, ... x_N]^T`, then the covariance matrix element `C_{ij}` is the covariance of `x_i` and `x_j`. The element `C_{ii}` is the variance of `x_i`. Args: m: A 1-D or 2-D array containing multiple variables and observations. Each row of `m` represents a variable, and each column a single observation of all those variables. rowvar: If `rowvar` is True, then each row represents a variable, with observations in the columns. Otherwise, the relationship is transposed: each column represents a variable, while the rows contain observations. Returns: The covariance matrix of the variables. ''' if m.dim() > 2: raise ValueError('m has more than 2 dimensions') if m.dim() < 2: m = m.view(1, -1) if not rowvar and m.size(0) != 1: m = m.t() # m = m.type(torch.double) # uncomment this line if desired fact = 1.0 / (m.size(1) - 1) m -= torch.mean(m, dim=1, keepdim=True) mt = m.t() # if complex: mt = m.t().conj() return fact * m.matmul(mt).squeeze() # Pytorch implementation of matrix sqrt, from Tsung-Yu Lin, and Subhransu Maji # https://github.com/msubhransu/matrix-sqrt def sqrt_newton_schulz(A, numIters, dtype=None): with torch.no_grad(): if dtype is None: dtype = A.type() batchSize = A.shape[0] dim = A.shape[1] normA = A.mul(A).sum(dim=1).sum(dim=1).sqrt() Y = A.div(normA.view(batchSize, 1, 1).expand_as(A)); I = torch.eye(dim,dim).view(1, dim, dim).repeat(batchSize,1,1).type(dtype) Z = torch.eye(dim,dim).view(1, dim, dim).repeat(batchSize,1,1).type(dtype) for i in range(numIters): T = 0.5*(3.0*I - Z.bmm(Y)) Y = Y.bmm(T) Z = T.bmm(Z) sA = Y*torch.sqrt(normA).view(batchSize, 1, 1).expand_as(A) return sA # FID calculator from TTUR--consider replacing this with GPU-accelerated cov # calculations using torch? def numpy_calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6): """Numpy implementation of the Frechet Distance. Taken from https://github.com/bioinf-jku/TTUR The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1) and X_2 ~ N(mu_2, C_2) is d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)). Stable version by Dougal J. Sutherland. Params: -- mu1 : Numpy array containing the activations of a layer of the inception net (like returned by the function 'get_predictions') for generated samples. -- mu2 : The sample mean over activations, precalculated on an representive data set. -- sigma1: The covariance matrix over activations for generated samples. -- sigma2: The covariance matrix over activations, precalculated on an representive data set. Returns: -- : The Frechet Distance. """ mu1 = np.atleast_1d(mu1) mu2 = np.atleast_1d(mu2) sigma1 = np.atleast_2d(sigma1) sigma2 = np.atleast_2d(sigma2) assert mu1.shape == mu2.shape, \ 'Training and test mean vectors have different lengths' assert sigma1.shape == sigma2.shape, \ 'Training and test covariances have different dimensions' diff = mu1 - mu2 # Product might be almost singular covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False) if not np.isfinite(covmean).all(): msg = ('fid calculation produces singular product; ' 'adding %s to diagonal of cov estimates') % eps print(msg) offset = np.eye(sigma1.shape[0]) * eps covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset)) # Numerical error might give slight imaginary component if np.iscomplexobj(covmean): print('wat') if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3): m = np.max(np.abs(covmean.imag)) raise ValueError('Imaginary component {}'.format(m)) covmean = covmean.real tr_covmean = np.trace(covmean) out = diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean return out def torch_calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6): """Pytorch implementation of the Frechet Distance. Taken from https://github.com/bioinf-jku/TTUR The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1) and X_2 ~ N(mu_2, C_2) is d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)). Stable version by Dougal J. Sutherland. Params: -- mu1 : Numpy array containing the activations of a layer of the inception net (like returned by the function 'get_predictions') for generated samples. -- mu2 : The sample mean over activations, precalculated on an representive data set. -- sigma1: The covariance matrix over activations for generated samples. -- sigma2: The covariance matrix over activations, precalculated on an representive data set. Returns: -- : The Frechet Distance. """ assert mu1.shape == mu2.shape, \ 'Training and test mean vectors have different lengths' assert sigma1.shape == sigma2.shape, \ 'Training and test covariances have different dimensions' diff = mu1 - mu2 # Run 50 itrs of newton-schulz to get the matrix sqrt of sigma1 dot sigma2 covmean = sqrt_newton_schulz(sigma1.mm(sigma2).unsqueeze(0), 50).squeeze() out = (diff.dot(diff) + torch.trace(sigma1) + torch.trace(sigma2) - 2 * torch.trace(covmean)) return out # Calculate Inception Score mean + std given softmax'd logits and number of splits def calculate_inception_score(pred, num_splits=10): scores = [] for index in range(num_splits): pred_chunk = pred[index * (pred.shape[0] // num_splits): (index + 1) * (pred.shape[0] // num_splits), :] kl_inception = pred_chunk * (np.log(pred_chunk) - np.log(np.expand_dims(np.mean(pred_chunk, 0), 0))) kl_inception = np.mean(np.sum(kl_inception, 1)) scores.append(np.exp(kl_inception)) return np.mean(scores), np.std(scores) # Loop and run the sampler and the net until it accumulates num_inception_images # activations. Return the pool, the logits, and the labels (if one wants # Inception Accuracy the labels of the generated class will be needed) def accumulate_inception_activations(sample, net, num_inception_images=50000): pool, logits, labels = [], [], [] while (torch.cat(logits, 0).shape[0] if len(logits) else 0) < num_inception_images: with torch.no_grad(): images, labels_val = sample() pool_val, logits_val = net(images.float()) pool += [pool_val] logits += [F.softmax(logits_val, 1)] labels += [labels_val] return torch.cat(pool, 0), torch.cat(logits, 0), torch.cat(labels, 0) # Load and wrap the Inception model def load_inception_net(parallel=False): inception_model = inception_v3(pretrained=True, transform_input=False) inception_model = WrapInception(inception_model.eval()).cuda() if parallel: print('Parallelizing Inception module...') inception_model = nn.DataParallel(inception_model) return inception_model # This produces a function which takes in an iterator which returns a set number of samples # and iterates until it accumulates config['num_inception_images'] images. # The iterator can return samples with a different batch size than used in # training, using the setting confg['inception_batchsize'] def prepare_inception_metrics(dataset, parallel, no_fid=False): # Load metrics; this is intentionally not in a try-except loop so that # the script will crash here if it cannot find the Inception moments. # By default, remove the "hdf5" from dataset dataset = dataset.strip('_hdf5') data_mu = np.load(dataset+'_inception_moments.npz')['mu'] data_sigma = np.load(dataset+'_inception_moments.npz')['sigma'] # Load network net = load_inception_net(parallel) def get_inception_metrics(sample, num_inception_images, num_splits=10, prints=True, use_torch=False): if prints: print('Gathering activations...') pool, logits, labels = accumulate_inception_activations(sample, net, num_inception_images) if prints: print('Calculating Inception Score...') IS_mean, IS_std = calculate_inception_score(logits.cpu().numpy(), num_splits) if no_fid: FID = 9999.0 else: if prints: print('Calculating means and covariances...') if use_torch: mu, sigma = torch.mean(pool, 0), torch_cov(pool, rowvar=False) else: mu, sigma = np.mean(pool.cpu().numpy(), axis=0), np.cov(pool.cpu().numpy(), rowvar=False) if prints: print('Covariances calculated, getting FID...') if use_torch: FID = torch_calculate_frechet_distance(mu, sigma, torch.tensor(data_mu).float().cuda(), torch.tensor(data_sigma).float().cuda()) FID = float(FID.cpu().numpy()) else: FID = numpy_calculate_frechet_distance(mu, sigma, data_mu, data_sigma) # Delete mu, sigma, pool, logits, and labels, just in case del mu, sigma, pool, logits, labels return IS_mean, IS_std, FID return get_inception_metrics ================================================ FILE: FQ-BigGAN/layers.py ================================================ ''' Layers This file contains various layers for the BigGAN models. ''' import numpy as np import torch import torch.nn as nn from torch.nn import init import torch.optim as optim import torch.nn.functional as F from torch.nn import Parameter as P from sync_batchnorm import SynchronizedBatchNorm2d as SyncBN2d # Projection of x onto y def proj(x, y): return torch.mm(y, x.t()) * y / torch.mm(y, y.t()) # Orthogonalize x wrt list of vectors ys def gram_schmidt(x, ys): for y in ys: x = x - proj(x, y) return x # Apply num_itrs steps of the power method to estimate top N singular values. def power_iteration(W, u_, update=True, eps=1e-12): # Lists holding singular vectors and values us, vs, svs = [], [], [] for i, u in enumerate(u_): # Run one step of the power iteration with torch.no_grad(): v = torch.matmul(u, W) # Run Gram-Schmidt to subtract components of all other singular vectors v = F.normalize(gram_schmidt(v, vs), eps=eps) # Add to the list vs += [v] # Update the other singular vector u = torch.matmul(v, W.t()) # Run Gram-Schmidt to subtract components of all other singular vectors u = F.normalize(gram_schmidt(u, us), eps=eps) # Add to the list us += [u] if update: u_[i][:] = u # Compute this singular value and add it to the list svs += [torch.squeeze(torch.matmul(torch.matmul(v, W.t()), u.t()))] #svs += [torch.sum(F.linear(u, W.transpose(0, 1)) * v)] return svs, us, vs # Convenience passthrough function class identity(nn.Module): def forward(self, input): return input # Spectral normalization base class class SN(object): def __init__(self, num_svs, num_itrs, num_outputs, transpose=False, eps=1e-12): # Number of power iterations per step self.num_itrs = num_itrs # Number of singular values self.num_svs = num_svs # Transposed? self.transpose = transpose # Epsilon value for avoiding divide-by-0 self.eps = eps # Register a singular vector for each sv for i in range(self.num_svs): self.register_buffer('u%d' % i, torch.randn(1, num_outputs)) self.register_buffer('sv%d' % i, torch.ones(1)) # Singular vectors (u side) @property def u(self): return [getattr(self, 'u%d' % i) for i in range(self.num_svs)] # Singular values; # note that these buffers are just for logging and are not used in training. @property def sv(self): return [getattr(self, 'sv%d' % i) for i in range(self.num_svs)] # Compute the spectrally-normalized weight def W_(self): W_mat = self.weight.view(self.weight.size(0), -1) if self.transpose: W_mat = W_mat.t() # Apply num_itrs power iterations for _ in range(self.num_itrs): svs, us, vs = power_iteration(W_mat, self.u, update=self.training, eps=self.eps) # Update the svs if self.training: with torch.no_grad(): # Make sure to do this in a no_grad() context or you'll get memory leaks! for i, sv in enumerate(svs): self.sv[i][:] = sv return self.weight / svs[0] # 2D Conv layer with spectral norm class SNConv2d(nn.Conv2d, SN): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, num_svs=1, num_itrs=1, eps=1e-12): nn.Conv2d.__init__(self, in_channels, out_channels, kernel_size, stride, padding, dilation, groups, bias) SN.__init__(self, num_svs, num_itrs, out_channels, eps=eps) def forward(self, x): return F.conv2d(x, self.W_(), self.bias, self.stride, self.padding, self.dilation, self.groups) # Linear layer with spectral norm class SNLinear(nn.Linear, SN): def __init__(self, in_features, out_features, bias=True, num_svs=1, num_itrs=1, eps=1e-12): nn.Linear.__init__(self, in_features, out_features, bias) SN.__init__(self, num_svs, num_itrs, out_features, eps=eps) def forward(self, x): return F.linear(x, self.W_(), self.bias) # Embedding layer with spectral norm # We use num_embeddings as the dim instead of embedding_dim here # for convenience sake class SNEmbedding(nn.Embedding, SN): def __init__(self, num_embeddings, embedding_dim, padding_idx=None, max_norm=None, norm_type=2, scale_grad_by_freq=False, sparse=False, _weight=None, num_svs=1, num_itrs=1, eps=1e-12): nn.Embedding.__init__(self, num_embeddings, embedding_dim, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse, _weight) SN.__init__(self, num_svs, num_itrs, num_embeddings, eps=eps) def forward(self, x): return F.embedding(x, self.W_()) # A non-local block as used in SA-GAN # Note that the implementation as described in the paper is largely incorrect; # refer to the released code for the actual implementation. class Attention(nn.Module): def __init__(self, ch, which_conv=SNConv2d, name='attention'): super(Attention, self).__init__() # Channel multiplier self.ch = ch self.which_conv = which_conv self.theta = self.which_conv(self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False) self.phi = self.which_conv(self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False) self.g = self.which_conv(self.ch, self.ch // 2, kernel_size=1, padding=0, bias=False) self.o = self.which_conv(self.ch // 2, self.ch, kernel_size=1, padding=0, bias=False) # Learnable gain parameter self.gamma = P(torch.tensor(0.), requires_grad=True) def forward(self, x, y=None): # Apply convs theta = self.theta(x) phi = F.max_pool2d(self.phi(x), [2,2]) g = F.max_pool2d(self.g(x), [2,2]) # Perform reshapes theta = theta.view(-1, self. ch // 8, x.shape[2] * x.shape[3]) phi = phi.view(-1, self. ch // 8, x.shape[2] * x.shape[3] // 4) g = g.view(-1, self. ch // 2, x.shape[2] * x.shape[3] // 4) # Matmul and softmax to get attention maps beta = F.softmax(torch.bmm(theta.transpose(1, 2), phi), -1) # Attention map times g path o = self.o(torch.bmm(g, beta.transpose(1,2)).view(-1, self.ch // 2, x.shape[2], x.shape[3])) return self.gamma * o + x # Fused batchnorm op def fused_bn(x, mean, var, gain=None, bias=None, eps=1e-5): # Apply scale and shift--if gain and bias are provided, fuse them here # Prepare scale scale = torch.rsqrt(var + eps) # If a gain is provided, use it if gain is not None: scale = scale * gain # Prepare shift shift = mean * scale # If bias is provided, use it if bias is not None: shift = shift - bias return x * scale - shift #return ((x - mean) / ((var + eps) ** 0.5)) * gain + bias # The unfused way. # Manual BN # Calculate means and variances using mean-of-squares minus mean-squared def manual_bn(x, gain=None, bias=None, return_mean_var=False, eps=1e-5): # Cast x to float32 if necessary float_x = x.float() # Calculate expected value of x (m) and expected value of x**2 (m2) # Mean of x m = torch.mean(float_x, [0, 2, 3], keepdim=True) # Mean of x squared m2 = torch.mean(float_x ** 2, [0, 2, 3], keepdim=True) # Calculate variance as mean of squared minus mean squared. var = (m2 - m **2) # Cast back to float 16 if necessary var = var.type(x.type()) m = m.type(x.type()) # Return mean and variance for updating stored mean/var if requested if return_mean_var: return fused_bn(x, m, var, gain, bias, eps), m.squeeze(), var.squeeze() else: return fused_bn(x, m, var, gain, bias, eps) # My batchnorm, supports standing stats class myBN(nn.Module): def __init__(self, num_channels, eps=1e-5, momentum=0.1): super(myBN, self).__init__() # momentum for updating running stats self.momentum = momentum # epsilon to avoid dividing by 0 self.eps = eps # Momentum self.momentum = momentum # Register buffers self.register_buffer('stored_mean', torch.zeros(num_channels)) self.register_buffer('stored_var', torch.ones(num_channels)) self.register_buffer('accumulation_counter', torch.zeros(1)) # Accumulate running means and vars self.accumulate_standing = False # reset standing stats def reset_stats(self): self.stored_mean[:] = 0 self.stored_var[:] = 0 self.accumulation_counter[:] = 0 def forward(self, x, gain, bias): if self.training: out, mean, var = manual_bn(x, gain, bias, return_mean_var=True, eps=self.eps) # If accumulating standing stats, increment them if self.accumulate_standing: self.stored_mean[:] = self.stored_mean + mean.data self.stored_var[:] = self.stored_var + var.data self.accumulation_counter += 1.0 # If not accumulating standing stats, take running averages else: self.stored_mean[:] = self.stored_mean * (1 - self.momentum) + mean * self.momentum self.stored_var[:] = self.stored_var * (1 - self.momentum) + var * self.momentum return out # If not in training mode, use the stored statistics else: mean = self.stored_mean.view(1, -1, 1, 1) var = self.stored_var.view(1, -1, 1, 1) # If using standing stats, divide them by the accumulation counter if self.accumulate_standing: mean = mean / self.accumulation_counter var = var / self.accumulation_counter return fused_bn(x, mean, var, gain, bias, self.eps) # Simple function to handle groupnorm norm stylization def groupnorm(x, norm_style): # If number of channels specified in norm_style: if 'ch' in norm_style: ch = int(norm_style.split('_')[-1]) groups = max(int(x.shape[1]) // ch, 1) # If number of groups specified in norm style elif 'grp' in norm_style: groups = int(norm_style.split('_')[-1]) # If neither, default to groups = 16 else: groups = 16 return F.group_norm(x, groups) # Class-conditional bn # output size is the number of channels, input size is for the linear layers # Andy's Note: this class feels messy but I'm not really sure how to clean it up # Suggestions welcome! (By which I mean, refactor this and make a pull request # if you want to make this more readable/usable). class ccbn(nn.Module): def __init__(self, output_size, input_size, which_linear, eps=1e-5, momentum=0.1, cross_replica=False, mybn=False, norm_style='bn',): super(ccbn, self).__init__() self.output_size, self.input_size = output_size, input_size # Prepare gain and bias layers self.gain = which_linear(input_size, output_size) self.bias = which_linear(input_size, output_size) # epsilon to avoid dividing by 0 self.eps = eps # Momentum self.momentum = momentum # Use cross-replica batchnorm? self.cross_replica = cross_replica # Use my batchnorm? self.mybn = mybn # Norm style? self.norm_style = norm_style if self.cross_replica: self.bn = SyncBN2d(output_size, eps=self.eps, momentum=self.momentum, affine=False) elif self.mybn: self.bn = myBN(output_size, self.eps, self.momentum) elif self.norm_style in ['bn', 'in']: self.register_buffer('stored_mean', torch.zeros(output_size)) self.register_buffer('stored_var', torch.ones(output_size)) def forward(self, x, y): # Calculate class-conditional gains and biases gain = (1 + self.gain(y)).view(y.size(0), -1, 1, 1) bias = self.bias(y).view(y.size(0), -1, 1, 1) # If using my batchnorm if self.mybn or self.cross_replica: return self.bn(x, gain=gain, bias=bias) # else: else: if self.norm_style == 'bn': out = F.batch_norm(x, self.stored_mean, self.stored_var, None, None, self.training, 0.1, self.eps) elif self.norm_style == 'in': out = F.instance_norm(x, self.stored_mean, self.stored_var, None, None, self.training, 0.1, self.eps) elif self.norm_style == 'gn': out = groupnorm(x, self.normstyle) elif self.norm_style == 'nonorm': out = x return out * gain + bias def extra_repr(self): s = 'out: {output_size}, in: {input_size},' s +=' cross_replica={cross_replica}' return s.format(**self.__dict__) # Normal, non-class-conditional BN class bn(nn.Module): def __init__(self, output_size, eps=1e-5, momentum=0.1, cross_replica=False, mybn=False): super(bn, self).__init__() self.output_size= output_size # Prepare gain and bias layers self.gain = P(torch.ones(output_size), requires_grad=True) self.bias = P(torch.zeros(output_size), requires_grad=True) # epsilon to avoid dividing by 0 self.eps = eps # Momentum self.momentum = momentum # Use cross-replica batchnorm? self.cross_replica = cross_replica # Use my batchnorm? self.mybn = mybn if self.cross_replica: self.bn = SyncBN2d(output_size, eps=self.eps, momentum=self.momentum, affine=False) elif mybn: self.bn = myBN(output_size, self.eps, self.momentum) # Register buffers if neither of the above else: self.register_buffer('stored_mean', torch.zeros(output_size)) self.register_buffer('stored_var', torch.ones(output_size)) def forward(self, x, y=None): if self.cross_replica or self.mybn: gain = self.gain.view(1,-1,1,1) bias = self.bias.view(1,-1,1,1) return self.bn(x, gain=gain, bias=bias) else: return F.batch_norm(x, self.stored_mean, self.stored_var, self.gain, self.bias, self.training, self.momentum, self.eps) # Generator blocks # Note that this class assumes the kernel size and padding (and any other # settings) have been selected in the main generator module and passed in # through the which_conv arg. Similar rules apply with which_bn (the input # size [which is actually the number of channels of the conditional info] must # be preselected) class GBlock(nn.Module): def __init__(self, in_channels, out_channels, which_conv=nn.Conv2d, which_bn=bn, activation=None, upsample=None): super(GBlock, self).__init__() self.in_channels, self.out_channels = in_channels, out_channels self.which_conv, self.which_bn = which_conv, which_bn self.activation = activation self.upsample = upsample # Conv layers self.conv1 = self.which_conv(self.in_channels, self.out_channels) self.conv2 = self.which_conv(self.out_channels, self.out_channels) self.learnable_sc = in_channels != out_channels or upsample if self.learnable_sc: self.conv_sc = self.which_conv(in_channels, out_channels, kernel_size=1, padding=0) # Batchnorm layers self.bn1 = self.which_bn(in_channels) self.bn2 = self.which_bn(out_channels) # upsample layers self.upsample = upsample def forward(self, x, y): h = self.activation(self.bn1(x, y)) if self.upsample: h = self.upsample(h) x = self.upsample(x) h = self.conv1(h) h = self.activation(self.bn2(h, y)) h = self.conv2(h) if self.learnable_sc: x = self.conv_sc(x) return h + x # Residual block for the discriminator class DBlock(nn.Module): def __init__(self, in_channels, out_channels, which_conv=SNConv2d, wide=True, preactivation=False, activation=None, downsample=None,): super(DBlock, self).__init__() self.in_channels, self.out_channels = in_channels, out_channels # If using wide D (as in SA-GAN and BigGAN), change the channel pattern self.hidden_channels = self.out_channels if wide else self.in_channels self.which_conv = which_conv self.preactivation = preactivation self.activation = activation self.downsample = downsample # Conv layers self.conv1 = self.which_conv(self.in_channels, self.hidden_channels) self.conv2 = self.which_conv(self.hidden_channels, self.out_channels) self.learnable_sc = True if (in_channels != out_channels) or downsample else False if self.learnable_sc: self.conv_sc = self.which_conv(in_channels, out_channels, kernel_size=1, padding=0) def shortcut(self, x): if self.preactivation: if self.learnable_sc: x = self.conv_sc(x) if self.downsample: x = self.downsample(x) else: if self.downsample: x = self.downsample(x) if self.learnable_sc: x = self.conv_sc(x) return x def forward(self, x): if self.preactivation: # h = self.activation(x) # NOT TODAY SATAN # Andy's note: This line *must* be an out-of-place ReLU or it # will negatively affect the shortcut connection. h = F.relu(x) else: h = x h = self.conv1(h) h = self.conv2(self.activation(h)) if self.downsample: h = self.downsample(h) return h + self.shortcut(x) # dogball ================================================ FILE: FQ-BigGAN/losses.py ================================================ import torch import torch.nn.functional as F # DCGAN loss def loss_dcgan_dis(dis_fake, dis_real): L1 = torch.mean(F.softplus(-dis_real)) L2 = torch.mean(F.softplus(dis_fake)) return L1, L2 def loss_dcgan_gen(dis_fake): loss = torch.mean(F.softplus(-dis_fake)) return loss # Hinge Loss def loss_hinge_dis(dis_fake, dis_real): loss_real = torch.mean(F.relu(1. - dis_real)) loss_fake = torch.mean(F.relu(1. + dis_fake)) return loss_real, loss_fake # def loss_hinge_dis(dis_fake, dis_real): # This version returns a single loss # loss = torch.mean(F.relu(1. - dis_real)) # loss += torch.mean(F.relu(1. + dis_fake)) # return loss def loss_hinge_gen(dis_fake): loss = -torch.mean(dis_fake) return loss # Default to hinge loss generator_loss = loss_hinge_gen discriminator_loss = loss_hinge_dis ================================================ FILE: FQ-BigGAN/make_hdf5.py ================================================ """ Convert dataset to HDF5 This script preprocesses a dataset and saves it (images and labels) to an HDF5 file for improved I/O. """ import os import sys from argparse import ArgumentParser from tqdm import tqdm, trange import h5py as h5 import numpy as np import torch import torchvision.datasets as dset import torchvision.transforms as transforms from torchvision.utils import save_image import torchvision.transforms as transforms from torch.utils.data import DataLoader import utils def prepare_parser(): usage = 'Parser for ImageNet HDF5 scripts.' parser = ArgumentParser(description=usage) parser.add_argument( '--dataset', type=str, default='I128', help='Which Dataset to train on, out of I128, I256, C10, C100;' 'Append "_hdf5" to use the hdf5 version for ISLVRC (default: %(default)s)') parser.add_argument( '--data_root', type=str, default='data', help='Default location where data is stored (default: %(default)s)') parser.add_argument( '--batch_size', type=int, default=256, help='Default overall batchsize (default: %(default)s)') parser.add_argument( '--num_workers', type=int, default=16, help='Number of dataloader workers (default: %(default)s)') parser.add_argument( '--chunk_size', type=int, default=500, help='Default overall batchsize (default: %(default)s)') parser.add_argument( '--compression', action='store_true', default=False, help='Use LZF compression? (default: %(default)s)') return parser def run(config): if 'hdf5' in config['dataset']: raise ValueError('Reading from an HDF5 file which you will probably be ' 'about to overwrite! Override this error only if you know ' 'what you''re doing!') # Get image size config['image_size'] = utils.imsize_dict[config['dataset']] # Update compression entry config['compression'] = 'lzf' if config['compression'] else None #No compression; can also use 'lzf' # Get dataset kwargs = {'num_workers': config['num_workers'], 'pin_memory': False, 'drop_last': False} train_loader = utils.get_data_loaders(dataset=config['dataset'], batch_size=config['batch_size'], shuffle=False, data_root=config['data_root'], use_multiepoch_sampler=False, **kwargs)[0] # HDF5 supports chunking and compression. You may want to experiment # with different chunk sizes to see how it runs on your machines. # Chunk Size/compression Read speed @ 256x256 Read speed @ 128x128 Filesize @ 128x128 Time to write @128x128 # 1 / None 20/s # 500 / None ramps up to 77/s 102/s 61GB 23min # 500 / LZF 8/s 56GB 23min # 1000 / None 78/s # 5000 / None 81/s # auto:(125,1,16,32) / None 11/s 61GB print('Starting to load %s into an HDF5 file with chunk size %i and compression %s...' % (config['dataset'], config['chunk_size'], config['compression'])) # Loop over train loader for i,(x,y) in enumerate(tqdm(train_loader)): # Stick X into the range [0, 255] since it's coming from the train loader x = (255 * ((x + 1) / 2.0)).byte().numpy() # Numpyify y y = y.numpy() # If we're on the first batch, prepare the hdf5 if i==0: with h5.File(config['data_root'] + '/ILSVRC%i.hdf5' % config['image_size'], 'w') as f: print('Producing dataset of len %d' % len(train_loader.dataset)) imgs_dset = f.create_dataset('imgs', x.shape,dtype='uint8', maxshape=(len(train_loader.dataset), 3, config['image_size'], config['image_size']), chunks=(config['chunk_size'], 3, config['image_size'], config['image_size']), compression=config['compression']) print('Image chunks chosen as ' + str(imgs_dset.chunks)) imgs_dset[...] = x labels_dset = f.create_dataset('labels', y.shape, dtype='int64', maxshape=(len(train_loader.dataset),), chunks=(config['chunk_size'],), compression=config['compression']) print('Label chunks chosen as ' + str(labels_dset.chunks)) labels_dset[...] = y # Else append to the hdf5 else: with h5.File(config['data_root'] + '/ILSVRC%i.hdf5' % config['image_size'], 'a') as f: f['imgs'].resize(f['imgs'].shape[0] + x.shape[0], axis=0) f['imgs'][-x.shape[0]:] = x f['labels'].resize(f['labels'].shape[0] + y.shape[0], axis=0) f['labels'][-y.shape[0]:] = y def main(): # parse command line and run parser = prepare_parser() config = vars(parser.parse_args()) print(config) run(config) if __name__ == '__main__': main() ================================================ FILE: FQ-BigGAN/sample.py ================================================ ''' Sample This script loads a pretrained net and a weightsfile and sample ''' import functools import math import numpy as np from tqdm import tqdm, trange import torch import torch.nn as nn from torch.nn import init import torch.optim as optim import torch.nn.functional as F from torch.nn import Parameter as P import torchvision # Import my stuff import inception_utils import utils import losses def run(config): # Prepare state dict, which holds things like epoch # and itr # state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config} # Optionally, get the configuration from the state dict. This allows for # recovery of the config provided only a state dict and experiment name, # and can be convenient for writing less verbose sample shell scripts. if config['config_from_name']: utils.load_weights(None, None, state_dict, config['weights_root'], config['experiment_name'], config['load_weights'], None, strict=False, load_optim=False) # Ignore items which we might want to overwrite from the command line for item in state_dict['config']: if item not in ['z_var', 'base_root', 'batch_size', 'G_batch_size', 'use_ema', 'G_eval_mode']: config[item] = state_dict['config'][item] # update config (see train.py for explanation) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] config = utils.update_config_roots(config) config['skip_init'] = True config['no_optim'] = True device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. model = __import__(config['model']) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) G = model.Generator(**config).cuda() utils.count_parameters(G) # Load weights print('Loading weights...') # Here is where we deal with the ema--load ema weights or load normal weights utils.load_weights(G if not (config['use_ema']) else None, None, state_dict, config['weights_root'], experiment_name, config['load_weights'], G if config['ema'] and config['use_ema'] else None, strict=False, load_optim=False) # Update batch size setting used for G G_batch_size = max(config['G_batch_size'], config['batch_size']) z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16'], z_var=config['z_var']) if config['G_eval_mode']: print('Putting G in eval mode..') G.eval() else: print('G is in %s mode...' % ('training' if G.training else 'eval')) #Sample function sample = functools.partial(utils.sample, G=G, z_=z_, y_=y_, config=config) if config['accumulate_stats']: print('Accumulating standing stats across %d accumulations...' % config['num_standing_accumulations']) utils.accumulate_standing_stats(G, z_, y_, config['n_classes'], config['num_standing_accumulations']) # Sample a number of images and save them to an NPZ, for use with TF-Inception if config['sample_npz']: # Lists to hold images and labels for images x, y = [], [] print('Sampling %d images and saving them to npz...' % config['sample_num_npz']) for i in trange(int(np.ceil(config['sample_num_npz'] / float(G_batch_size)))): with torch.no_grad(): images, labels = sample() x += [np.uint8(255 * (images.cpu().numpy() + 1) / 2.)] y += [labels.cpu().numpy()] x = np.concatenate(x, 0)[:config['sample_num_npz']] y = np.concatenate(y, 0)[:config['sample_num_npz']] print('Images shape: %s, Labels shape: %s' % (x.shape, y.shape)) npz_filename = '%s/%s/samples.npz' % (config['samples_root'], experiment_name) print('Saving npz to %s...' % npz_filename) np.savez(npz_filename, **{'x' : x, 'y' : y}) # Prepare sample sheets if config['sample_sheets']: print('Preparing conditional sample sheets...') utils.sample_sheet(G, classes_per_sheet=utils.classes_per_sheet_dict[config['dataset']], num_classes=config['n_classes'], samples_per_class=10, parallel=config['parallel'], samples_root=config['samples_root'], experiment_name=experiment_name, folder_number=config['sample_sheet_folder_num'], z_=z_,) # Sample interp sheets if config['sample_interps']: print('Preparing interp sheets...') for fix_z, fix_y in zip([False, False, True], [False, True, False]): utils.interp_sheet(G, num_per_sheet=16, num_midpoints=8, num_classes=config['n_classes'], parallel=config['parallel'], samples_root=config['samples_root'], experiment_name=experiment_name, folder_number=config['sample_sheet_folder_num'], sheet_number=0, fix_z=fix_z, fix_y=fix_y, device='cuda') # Sample random sheet if config['sample_random']: print('Preparing random sample sheet...') images, labels = sample() torchvision.utils.save_image(images.float(), '%s/%s/random_samples.jpg' % (config['samples_root'], experiment_name), nrow=int(G_batch_size**0.5), normalize=True) # Get Inception Score and FID get_inception_metrics = inception_utils.prepare_inception_metrics(config['dataset'], config['parallel'], config['no_fid']) # Prepare a simple function get metrics that we use for trunc curves def get_metrics(): sample = functools.partial(utils.sample, G=G, z_=z_, y_=y_, config=config) IS_mean, IS_std, FID = get_inception_metrics(sample, config['num_inception_images'], num_splits=10, prints=False) # Prepare output string outstring = 'Using %s weights ' % ('ema' if config['use_ema'] else 'non-ema') outstring += 'in %s mode, ' % ('eval' if config['G_eval_mode'] else 'training') outstring += 'with noise variance %3.3f, ' % z_.var outstring += 'over %d images, ' % config['num_inception_images'] if config['accumulate_stats'] or not config['G_eval_mode']: outstring += 'with batch size %d, ' % G_batch_size if config['accumulate_stats']: outstring += 'using %d standing stat accumulations, ' % config['num_standing_accumulations'] outstring += 'Itr %d: PYTORCH UNOFFICIAL Inception Score is %3.3f +/- %3.3f, PYTORCH UNOFFICIAL FID is %5.4f' % (state_dict['itr'], IS_mean, IS_std, FID) print(outstring) if config['sample_inception_metrics']: print('Calculating Inception metrics...') get_metrics() # Sample truncation curve stuff. This is basically the same as the inception metrics code if config['sample_trunc_curves']: start, step, end = [float(item) for item in config['sample_trunc_curves'].split('_')] print('Getting truncation values for variance in range (%3.3f:%3.3f:%3.3f)...' % (start, step, end)) for var in np.arange(start, end + step, step): z_.var = var # Optionally comment this out if you want to run with standing stats # accumulated at one z variance setting if config['accumulate_stats']: utils.accumulate_standing_stats(G, z_, y_, config['n_classes'], config['num_standing_accumulations']) get_metrics() def main(): # parse command line and run parser = utils.prepare_parser() parser = utils.add_sample_parser(parser) config = vars(parser.parse_args()) print(config) run(config) if __name__ == '__main__': main() ================================================ FILE: FQ-BigGAN/scripts/launch_C10.sh ================================================ #!/bin/bash #export CUDA_VISIBLE_DEVICES=0,1 python3 train.py --shuffle --batch_size 64 --parallel \ --num_G_accumulations 1 --num_D_accumulations 1 --num_epochs 500 \ --num_D_steps 4 --G_lr 2e-4 \ --D_lr 2e-4 --dataset C10 --G_ortho 0.0 \ --G_attn 0 --D_attn 0 --G_init N02 --D_init N02 \ --ema --use_ema --ema_start 1000 \ --test_every 1000 --save_every 1000 \ --num_best_copies 5 --num_save_copies 2 --seed 0 \ --discrete_layer 0123 --commitment 1.0 --dict_size 10 --dict_decay 0.8 \ --name_suffix quant ================================================ FILE: FQ-BigGAN/scripts/launch_C100.sh ================================================ #!/bin/bash #export CUDA_VISIBLE_DEVICES=2 python3 train.py --shuffle --batch_size 64 --parallel \ --num_G_accumulations 1 --num_D_accumulations 1 --num_epochs 500 \ --num_D_steps 4 --G_lr 2e-4 \ --D_lr 2e-4 --dataset C100 --G_ortho 0.0 \ --G_attn 0 --D_attn 0 --G_init N02 --D_init N02 \ --ema --use_ema --ema_start 1000 \ --test_every 2000 --save_every 1000 \ --num_best_copies 5 --num_save_copies 2 --seed 0 \ --discrete_layer 0123 --commitment 10.0 --dict_size 6 --dict_decay 0.9 \ --name_suffix quant ================================================ FILE: FQ-BigGAN/scripts/launch_I128_bs256x4.sh ================================================ #!/bin/bash # export CUDA_VISIBLE_DEVICES=1,2 python train.py \ --dataset I128_hdf5 --parallel --shuffle --num_workers 8 --batch_size 256 --load_in_mem \ --num_G_accumulations 4 --num_D_accumulations 4 \ --num_D_steps 1 --G_lr 1e-4 --D_lr 4e-4 --D_B2 0.999 --G_B2 0.999 \ --G_attn 64 --D_attn 64 \ --G_nl inplace_relu --D_nl inplace_relu \ --SN_eps 1e-6 --BN_eps 1e-5 --adam_eps 1e-6 \ --G_ortho 0.0 \ --hier --dim_z 120 \ --G_eval_mode \ --G_ch 64 --D_ch 64 \ --ema --use_ema --ema_start 20000 \ --test_every 1000 --save_every 1000 --num_best_copies 5 --num_save_copies 2 --seed 0 \ --discrete_layer 0123 --commitment 15.0 --dict_size 10 --dict_decay 0.8 \ --use_multiepoch_sampler --name_suffix quant ================================================ FILE: FQ-BigGAN/scripts/launch_I64_bs128x4.sh ================================================ #!/bin/bash export CUDA_VISIBLE_DEVICES=1,2 python train.py \ --dataset I64_hdf5 --parallel --shuffle --num_workers 8 --batch_size 128 --load_in_mem \ --num_G_accumulations 4 --num_D_accumulations 4 \ --num_D_steps 1 --G_lr 1e-4 --D_lr 4e-4 --D_B2 0.999 --G_B2 0.999 \ --G_attn 32 --D_attn 32 \ --G_nl inplace_relu --D_nl inplace_relu \ --SN_eps 1e-6 --BN_eps 1e-5 --adam_eps 1e-6 \ --G_ortho 0.0 \ --G_shared \ --G_init ortho --D_init ortho \ --hier --dim_z 120 --shared_dim 128 \ --G_eval_mode \ --G_ch 64 --D_ch 64 \ --ema --use_ema --ema_start 20000 \ --test_every 1000 --save_every 1000 --num_best_copies 5 --num_save_copies 2 --seed 0 \ --discrete_layer 2 --commitment 0.5 --dict_size 10 --dict_decay 0.7 \ --use_multiepoch_sampler --name_suffix test ================================================ FILE: FQ-BigGAN/scripts/utils/duplicate.sh ================================================ #duplicate.sh source=BigGAN_I128_hdf5_seed0_Gch64_Dch64_bs256_Glr1.0e-04_Dlr4.0e-04_Gnlinplace_relu_Dnlinplace_relu_Ginitxavier_Dinitxavier_Gshared_alex0 target=BigGAN_I128_hdf5_seed0_Gch64_Dch64_bs256_Glr1.0e-04_Dlr4.0e-04_Gnlinplace_relu_Dnlinplace_relu_Ginitxavier_Dinitxavier_Gshared_alex0A logs_root=logs weights_root=weights echo "copying ${source} to ${target}" cp -r ${logs_root}/${source} ${logs_root}/${target} cp ${logs_root}/${source}_log.jsonl ${logs_root}/${target}_log.jsonl cp ${weights_root}/${source}_G.pth ${weights_root}/${target}_G.pth cp ${weights_root}/${source}_G_ema.pth ${weights_root}/${target}_G_ema.pth cp ${weights_root}/${source}_D.pth ${weights_root}/${target}_D.pth cp ${weights_root}/${source}_G_optim.pth ${weights_root}/${target}_G_optim.pth cp ${weights_root}/${source}_D_optim.pth ${weights_root}/${target}_D_optim.pth cp ${weights_root}/${source}_state_dict.pth ${weights_root}/${target}_state_dict.pth ================================================ FILE: FQ-BigGAN/scripts/utils/prepare_data.sh ================================================ #!/bin/bash # export CUDA_VISIBLE_DEVICES=3 python make_hdf5.py --dataset C100 --batch_size 256 --data_root data python calculate_inception_moments.py --dataset C100 --data_root data --batch_size 128 ================================================ FILE: FQ-BigGAN/scripts/utils/trans.py ================================================ filename = 'prepare_data.sh' fileCont = open(filename, 'r').read() f = open(filename, 'w', newline='\n') f.write(fileCont) f.close() ================================================ FILE: FQ-BigGAN/sync_batchnorm/__init__.py ================================================ # -*- coding: utf-8 -*- # File : __init__.py # Author : Jiayuan Mao # Email : maojiayuan@gmail.com # Date : 27/01/2018 # # This file is part of Synchronized-BatchNorm-PyTorch. # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # Distributed under MIT License. from .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d from .replicate import DataParallelWithCallback, patch_replication_callback ================================================ FILE: FQ-BigGAN/sync_batchnorm/batchnorm.py ================================================ # -*- coding: utf-8 -*- # File : batchnorm.py # Author : Jiayuan Mao # Email : maojiayuan@gmail.com # Date : 27/01/2018 # # This file is part of Synchronized-BatchNorm-PyTorch. # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # Distributed under MIT License. import collections import torch import torch.nn.functional as F from torch.nn.modules.batchnorm import _BatchNorm from torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast from .comm import SyncMaster __all__ = ['SynchronizedBatchNorm1d', 'SynchronizedBatchNorm2d', 'SynchronizedBatchNorm3d'] def _sum_ft(tensor): """sum over the first and last dimention""" return tensor.sum(dim=0).sum(dim=-1) def _unsqueeze_ft(tensor): """add new dementions at the front and the tail""" return tensor.unsqueeze(0).unsqueeze(-1) _ChildMessage = collections.namedtuple('_ChildMessage', ['sum', 'ssum', 'sum_size']) _MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std']) # _MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'ssum', 'sum_size']) class _SynchronizedBatchNorm(_BatchNorm): def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True): super(_SynchronizedBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine) self._sync_master = SyncMaster(self._data_parallel_master) self._is_parallel = False self._parallel_id = None self._slave_pipe = None def forward(self, input, gain=None, bias=None): # If it is not parallel computation or is in evaluation mode, use PyTorch's implementation. if not (self._is_parallel and self.training): out = F.batch_norm( input, self.running_mean, self.running_var, self.weight, self.bias, self.training, self.momentum, self.eps) if gain is not None: out = out + gain if bias is not None: out = out + bias return out # Resize the input to (B, C, -1). input_shape = input.size() # print(input_shape) input = input.view(input.size(0), input.size(1), -1) # Compute the sum and square-sum. sum_size = input.size(0) * input.size(2) input_sum = _sum_ft(input) input_ssum = _sum_ft(input ** 2) # Reduce-and-broadcast the statistics. # print('it begins') if self._parallel_id == 0: mean, inv_std = self._sync_master.run_master(_ChildMessage(input_sum, input_ssum, sum_size)) else: mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(input_sum, input_ssum, sum_size)) # if self._parallel_id == 0: # # print('here') # sum, ssum, num = self._sync_master.run_master(_ChildMessage(input_sum, input_ssum, sum_size)) # else: # # print('there') # sum, ssum, num = self._slave_pipe.run_slave(_ChildMessage(input_sum, input_ssum, sum_size)) # print('how2') # num = sum_size # print('Sum: %f, ssum: %f, sumsize: %f, insum: %f' %(float(sum.sum().cpu()), float(ssum.sum().cpu()), float(sum_size), float(input_sum.sum().cpu()))) # Fix the graph # sum = (sum.detach() - input_sum.detach()) + input_sum # ssum = (ssum.detach() - input_ssum.detach()) + input_ssum # mean = sum / num # var = ssum / num - mean ** 2 # # var = (ssum - mean * sum) / num # inv_std = torch.rsqrt(var + self.eps) # Compute the output. if gain is not None: # print('gaining') # scale = _unsqueeze_ft(inv_std) * gain.squeeze(-1) # shift = _unsqueeze_ft(mean) * scale - bias.squeeze(-1) # output = input * scale - shift output = (input - _unsqueeze_ft(mean)) * (_unsqueeze_ft(inv_std) * gain.squeeze(-1)) + bias.squeeze(-1) elif self.affine: # MJY:: Fuse the multiplication for speed. output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std * self.weight) + _unsqueeze_ft(self.bias) else: output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std) # Reshape it. return output.view(input_shape) def __data_parallel_replicate__(self, ctx, copy_id): self._is_parallel = True self._parallel_id = copy_id # parallel_id == 0 means master device. if self._parallel_id == 0: ctx.sync_master = self._sync_master else: self._slave_pipe = ctx.sync_master.register_slave(copy_id) def _data_parallel_master(self, intermediates): """Reduce the sum and square-sum, compute the statistics, and broadcast it.""" # Always using same "device order" makes the ReduceAdd operation faster. # Thanks to:: Tete Xiao (http://tetexiao.com/) intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device()) to_reduce = [i[1][:2] for i in intermediates] to_reduce = [j for i in to_reduce for j in i] # flatten target_gpus = [i[1].sum.get_device() for i in intermediates] sum_size = sum([i[1].sum_size for i in intermediates]) sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce) mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size) broadcasted = Broadcast.apply(target_gpus, mean, inv_std) # print('a') # print(type(sum_), type(ssum), type(sum_size), sum_.shape, ssum.shape, sum_size) # broadcasted = Broadcast.apply(target_gpus, sum_, ssum, torch.tensor(sum_size).float().to(sum_.device)) # print('b') outputs = [] for i, rec in enumerate(intermediates): outputs.append((rec[0], _MasterMessage(*broadcasted[i*2:i*2+2]))) # outputs.append((rec[0], _MasterMessage(*broadcasted[i*3:i*3+3]))) return outputs def _compute_mean_std(self, sum_, ssum, size): """Compute the mean and standard-deviation with sum and square-sum. This method also maintains the moving average on the master device.""" assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.' mean = sum_ / size sumvar = ssum - sum_ * mean unbias_var = sumvar / (size - 1) bias_var = sumvar / size self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data return mean, torch.rsqrt(bias_var + self.eps) # return mean, bias_var.clamp(self.eps) ** -0.5 class SynchronizedBatchNorm1d(_SynchronizedBatchNorm): r"""Applies Synchronized Batch Normalization over a 2d or 3d input that is seen as a mini-batch. .. math:: y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta This module differs from the built-in PyTorch BatchNorm1d as the mean and standard-deviation are reduced across all devices during training. For example, when one uses `nn.DataParallel` to wrap the network during training, PyTorch's implementation normalize the tensor on each device using the statistics only on that device, which accelerated the computation and is also easy to implement, but the statistics might be inaccurate. Instead, in this synchronized version, the statistics will be computed over all training samples distributed on multiple devices. Note that, for one-GPU or CPU-only case, this module behaves exactly same as the built-in PyTorch implementation. The mean and standard-deviation are calculated per-dimension over the mini-batches and gamma and beta are learnable parameter vectors of size C (where C is the input size). During training, this layer keeps a running estimate of its computed mean and variance. The running sum is kept with a default momentum of 0.1. During evaluation, this running mean/variance is used for normalization. Because the BatchNorm is done over the `C` dimension, computing statistics on `(N, L)` slices, it's common terminology to call this Temporal BatchNorm Args: num_features: num_features from an expected input of size `batch_size x num_features [x width]` eps: a value added to the denominator for numerical stability. Default: 1e-5 momentum: the value used for the running_mean and running_var computation. Default: 0.1 affine: a boolean value that when set to ``True``, gives the layer learnable affine parameters. Default: ``True`` Shape: - Input: :math:`(N, C)` or :math:`(N, C, L)` - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input) Examples: >>> # With Learnable Parameters >>> m = SynchronizedBatchNorm1d(100) >>> # Without Learnable Parameters >>> m = SynchronizedBatchNorm1d(100, affine=False) >>> input = torch.autograd.Variable(torch.randn(20, 100)) >>> output = m(input) """ def _check_input_dim(self, input): if input.dim() != 2 and input.dim() != 3: raise ValueError('expected 2D or 3D input (got {}D input)' .format(input.dim())) super(SynchronizedBatchNorm1d, self)._check_input_dim(input) class SynchronizedBatchNorm2d(_SynchronizedBatchNorm): r"""Applies Batch Normalization over a 4d input that is seen as a mini-batch of 3d inputs .. math:: y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta This module differs from the built-in PyTorch BatchNorm2d as the mean and standard-deviation are reduced across all devices during training. For example, when one uses `nn.DataParallel` to wrap the network during training, PyTorch's implementation normalize the tensor on each device using the statistics only on that device, which accelerated the computation and is also easy to implement, but the statistics might be inaccurate. Instead, in this synchronized version, the statistics will be computed over all training samples distributed on multiple devices. Note that, for one-GPU or CPU-only case, this module behaves exactly same as the built-in PyTorch implementation. The mean and standard-deviation are calculated per-dimension over the mini-batches and gamma and beta are learnable parameter vectors of size C (where C is the input size). During training, this layer keeps a running estimate of its computed mean and variance. The running sum is kept with a default momentum of 0.1. During evaluation, this running mean/variance is used for normalization. Because the BatchNorm is done over the `C` dimension, computing statistics on `(N, H, W)` slices, it's common terminology to call this Spatial BatchNorm Args: num_features: num_features from an expected input of size batch_size x num_features x height x width eps: a value added to the denominator for numerical stability. Default: 1e-5 momentum: the value used for the running_mean and running_var computation. Default: 0.1 affine: a boolean value that when set to ``True``, gives the layer learnable affine parameters. Default: ``True`` Shape: - Input: :math:`(N, C, H, W)` - Output: :math:`(N, C, H, W)` (same shape as input) Examples: >>> # With Learnable Parameters >>> m = SynchronizedBatchNorm2d(100) >>> # Without Learnable Parameters >>> m = SynchronizedBatchNorm2d(100, affine=False) >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45)) >>> output = m(input) """ def _check_input_dim(self, input): if input.dim() != 4: raise ValueError('expected 4D input (got {}D input)' .format(input.dim())) super(SynchronizedBatchNorm2d, self)._check_input_dim(input) class SynchronizedBatchNorm3d(_SynchronizedBatchNorm): r"""Applies Batch Normalization over a 5d input that is seen as a mini-batch of 4d inputs .. math:: y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta This module differs from the built-in PyTorch BatchNorm3d as the mean and standard-deviation are reduced across all devices during training. For example, when one uses `nn.DataParallel` to wrap the network during training, PyTorch's implementation normalize the tensor on each device using the statistics only on that device, which accelerated the computation and is also easy to implement, but the statistics might be inaccurate. Instead, in this synchronized version, the statistics will be computed over all training samples distributed on multiple devices. Note that, for one-GPU or CPU-only case, this module behaves exactly same as the built-in PyTorch implementation. The mean and standard-deviation are calculated per-dimension over the mini-batches and gamma and beta are learnable parameter vectors of size C (where C is the input size). During training, this layer keeps a running estimate of its computed mean and variance. The running sum is kept with a default momentum of 0.1. During evaluation, this running mean/variance is used for normalization. Because the BatchNorm is done over the `C` dimension, computing statistics on `(N, D, H, W)` slices, it's common terminology to call this Volumetric BatchNorm or Spatio-temporal BatchNorm Args: num_features: num_features from an expected input of size batch_size x num_features x depth x height x width eps: a value added to the denominator for numerical stability. Default: 1e-5 momentum: the value used for the running_mean and running_var computation. Default: 0.1 affine: a boolean value that when set to ``True``, gives the layer learnable affine parameters. Default: ``True`` Shape: - Input: :math:`(N, C, D, H, W)` - Output: :math:`(N, C, D, H, W)` (same shape as input) Examples: >>> # With Learnable Parameters >>> m = SynchronizedBatchNorm3d(100) >>> # Without Learnable Parameters >>> m = SynchronizedBatchNorm3d(100, affine=False) >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45, 10)) >>> output = m(input) """ def _check_input_dim(self, input): if input.dim() != 5: raise ValueError('expected 5D input (got {}D input)' .format(input.dim())) super(SynchronizedBatchNorm3d, self)._check_input_dim(input) ================================================ FILE: FQ-BigGAN/sync_batchnorm/batchnorm_reimpl.py ================================================ #! /usr/bin/env python3 # -*- coding: utf-8 -*- # File : batchnorm_reimpl.py # Author : acgtyrant # Date : 11/01/2018 # # This file is part of Synchronized-BatchNorm-PyTorch. # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # Distributed under MIT License. import torch import torch.nn as nn import torch.nn.init as init __all__ = ['BatchNormReimpl'] class BatchNorm2dReimpl(nn.Module): """ A re-implementation of batch normalization, used for testing the numerical stability. Author: acgtyrant See also: https://github.com/vacancy/Synchronized-BatchNorm-PyTorch/issues/14 """ def __init__(self, num_features, eps=1e-5, momentum=0.1): super().__init__() self.num_features = num_features self.eps = eps self.momentum = momentum self.weight = nn.Parameter(torch.empty(num_features)) self.bias = nn.Parameter(torch.empty(num_features)) self.register_buffer('running_mean', torch.zeros(num_features)) self.register_buffer('running_var', torch.ones(num_features)) self.reset_parameters() def reset_running_stats(self): self.running_mean.zero_() self.running_var.fill_(1) def reset_parameters(self): self.reset_running_stats() init.uniform_(self.weight) init.zeros_(self.bias) def forward(self, input_): batchsize, channels, height, width = input_.size() numel = batchsize * height * width input_ = input_.permute(1, 0, 2, 3).contiguous().view(channels, numel) sum_ = input_.sum(1) sum_of_square = input_.pow(2).sum(1) mean = sum_ / numel sumvar = sum_of_square - sum_ * mean self.running_mean = ( (1 - self.momentum) * self.running_mean + self.momentum * mean.detach() ) unbias_var = sumvar / (numel - 1) self.running_var = ( (1 - self.momentum) * self.running_var + self.momentum * unbias_var.detach() ) bias_var = sumvar / numel inv_std = 1 / (bias_var + self.eps).pow(0.5) output = ( (input_ - mean.unsqueeze(1)) * inv_std.unsqueeze(1) * self.weight.unsqueeze(1) + self.bias.unsqueeze(1)) return output.view(channels, batchsize, height, width).permute(1, 0, 2, 3).contiguous() ================================================ FILE: FQ-BigGAN/sync_batchnorm/comm.py ================================================ # -*- coding: utf-8 -*- # File : comm.py # Author : Jiayuan Mao # Email : maojiayuan@gmail.com # Date : 27/01/2018 # # This file is part of Synchronized-BatchNorm-PyTorch. # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # Distributed under MIT License. import queue import collections import threading __all__ = ['FutureResult', 'SlavePipe', 'SyncMaster'] class FutureResult(object): """A thread-safe future implementation. Used only as one-to-one pipe.""" def __init__(self): self._result = None self._lock = threading.Lock() self._cond = threading.Condition(self._lock) def put(self, result): with self._lock: assert self._result is None, 'Previous result has\'t been fetched.' self._result = result self._cond.notify() def get(self): with self._lock: if self._result is None: self._cond.wait() res = self._result self._result = None return res _MasterRegistry = collections.namedtuple('MasterRegistry', ['result']) _SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result']) class SlavePipe(_SlavePipeBase): """Pipe for master-slave communication.""" def run_slave(self, msg): self.queue.put((self.identifier, msg)) ret = self.result.get() self.queue.put(True) return ret class SyncMaster(object): """An abstract `SyncMaster` object. - During the replication, as the data parallel will trigger an callback of each module, all slave devices should call `register(id)` and obtain an `SlavePipe` to communicate with the master. - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected, and passed to a registered callback. - After receiving the messages, the master device should gather the information and determine to message passed back to each slave devices. """ def __init__(self, master_callback): """ Args: master_callback: a callback to be invoked after having collected messages from slave devices. """ self._master_callback = master_callback self._queue = queue.Queue() self._registry = collections.OrderedDict() self._activated = False def __getstate__(self): return {'master_callback': self._master_callback} def __setstate__(self, state): self.__init__(state['master_callback']) def register_slave(self, identifier): """ Register an slave device. Args: identifier: an identifier, usually is the device id. Returns: a `SlavePipe` object which can be used to communicate with the master device. """ if self._activated: assert self._queue.empty(), 'Queue is not clean before next initialization.' self._activated = False self._registry.clear() future = FutureResult() self._registry[identifier] = _MasterRegistry(future) return SlavePipe(identifier, self._queue, future) def run_master(self, master_msg): """ Main entry for the master device in each forward pass. The messages were first collected from each devices (including the master device), and then an callback will be invoked to compute the message to be sent back to each devices (including the master device). Args: master_msg: the message that the master want to send to itself. This will be placed as the first message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example. Returns: the message to be sent back to the master device. """ self._activated = True intermediates = [(0, master_msg)] for i in range(self.nr_slaves): intermediates.append(self._queue.get()) results = self._master_callback(intermediates) assert results[0][0] == 0, 'The first result should belongs to the master.' for i, res in results: if i == 0: continue self._registry[i].result.put(res) for i in range(self.nr_slaves): assert self._queue.get() is True return results[0][1] @property def nr_slaves(self): return len(self._registry) ================================================ FILE: FQ-BigGAN/sync_batchnorm/replicate.py ================================================ # -*- coding: utf-8 -*- # File : replicate.py # Author : Jiayuan Mao # Email : maojiayuan@gmail.com # Date : 27/01/2018 # # This file is part of Synchronized-BatchNorm-PyTorch. # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # Distributed under MIT License. import functools from torch.nn.parallel.data_parallel import DataParallel __all__ = [ 'CallbackContext', 'execute_replication_callbacks', 'DataParallelWithCallback', 'patch_replication_callback' ] class CallbackContext(object): pass def execute_replication_callbacks(modules): """ Execute an replication callback `__data_parallel_replicate__` on each module created by original replication. The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` Note that, as all modules are isomorphism, we assign each sub-module with a context (shared among multiple copies of this module on different devices). Through this context, different copies can share some information. We guarantee that the callback on the master copy (the first copy) will be called ahead of calling the callback of any slave copies. """ master_copy = modules[0] nr_modules = len(list(master_copy.modules())) ctxs = [CallbackContext() for _ in range(nr_modules)] for i, module in enumerate(modules): for j, m in enumerate(module.modules()): if hasattr(m, '__data_parallel_replicate__'): m.__data_parallel_replicate__(ctxs[j], i) class DataParallelWithCallback(DataParallel): """ Data Parallel with a replication callback. An replication callback `__data_parallel_replicate__` of each module will be invoked after being created by original `replicate` function. The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)` Examples: > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) # sync_bn.__data_parallel_replicate__ will be invoked. """ def replicate(self, module, device_ids): modules = super(DataParallelWithCallback, self).replicate(module, device_ids) execute_replication_callbacks(modules) return modules def patch_replication_callback(data_parallel): """ Monkey-patch an existing `DataParallel` object. Add the replication callback. Useful when you have customized `DataParallel` implementation. Examples: > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) > sync_bn = DataParallel(sync_bn, device_ids=[0, 1]) > patch_replication_callback(sync_bn) # this is equivalent to > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False) > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1]) """ assert isinstance(data_parallel, DataParallel) old_replicate = data_parallel.replicate @functools.wraps(old_replicate) def new_replicate(module, device_ids): modules = old_replicate(module, device_ids) execute_replication_callbacks(modules) return modules data_parallel.replicate = new_replicate ================================================ FILE: FQ-BigGAN/sync_batchnorm/unittest.py ================================================ # -*- coding: utf-8 -*- # File : unittest.py # Author : Jiayuan Mao # Email : maojiayuan@gmail.com # Date : 27/01/2018 # # This file is part of Synchronized-BatchNorm-PyTorch. # https://github.com/vacancy/Synchronized-BatchNorm-PyTorch # Distributed under MIT License. import unittest import torch class TorchTestCase(unittest.TestCase): def assertTensorClose(self, x, y): adiff = float((x - y).abs().max()) if (y == 0).all(): rdiff = 'NaN' else: rdiff = float((adiff / y).abs().max()) message = ( 'Tensor close check failed\n' 'adiff={}\n' 'rdiff={}\n' ).format(adiff, rdiff) self.assertTrue(torch.allclose(x, y), message) ================================================ FILE: FQ-BigGAN/train.py ================================================ """ BigGAN: The Authorized Unofficial PyTorch release Code by A. Brock and A. Andonian This code is an unofficial reimplementation of "Large-Scale GAN Training for High Fidelity Natural Image Synthesis," by A. Brock, J. Donahue, and K. Simonyan (arXiv 1809.11096). Let's go. """ import os import functools import math import numpy as np from tqdm import tqdm, trange import torch import torch.nn as nn from torch.nn import init import torch.optim as optim import torch.nn.functional as F from torch.nn import Parameter as P import torchvision # Import my stuff import inception_utils import utils import losses import train_fns from sync_batchnorm import patch_replication_callback # The main training file. Config is a dictionary specifying the configuration # of this training run. def run(config): # Update the config dict as necessary # This is for convenience, to add settings derived from the user-specified # configuration into the config-dict (e.g. inferring the number of classes # and size of the images from the dataset, passing in a pytorch object # for the activation specified as a string) config['resolution'] = utils.imsize_dict[config['dataset']] config['n_classes'] = utils.nclass_dict[config['dataset']] config['G_activation'] = utils.activation_dict[config['G_nl']] config['D_activation'] = utils.activation_dict[config['D_nl']] # By default, skip init if resuming training. if config['resume']: print('Skipping initialization for training resumption...') config['skip_init'] = True config = utils.update_config_roots(config) device = 'cuda' # Seed RNG utils.seed_rng(config['seed']) # Prepare root folders if necessary utils.prepare_root(config) # Setup cudnn.benchmark for free speed torch.backends.cudnn.benchmark = True # Import the model--this line allows us to dynamically select different files. model = __import__(config['model']) experiment_name = (config['experiment_name'] if config['experiment_name'] else utils.name_from_config(config)) print('Experiment name is %s' % experiment_name) # Next, build the model G = model.Generator(**config).to(device) D = model.Discriminator(**config).to(device) # If using EMA, prepare it if config['ema']: print('Preparing EMA for G with decay of {}'.format(config['ema_decay'])) G_ema = model.Generator(**{**config, 'skip_init':True, 'no_optim': True}).to(device) ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start']) else: G_ema, ema = None, None # FP16? if config['G_fp16']: print('Casting G to float16...') G = G.half() if config['ema']: G_ema = G_ema.half() if config['D_fp16']: print('Casting D to fp16...') D = D.half() # Consider automatically reducing SN_eps? GD = model.G_D(G, D) print(G) print(D) print('Number of params in G: {} D: {}'.format( *[sum([p.data.nelement() for p in net.parameters()]) for net in [G,D]])) # Prepare state dict, which holds things like epoch # and itr # state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0, 'best_IS': 0, 'best_FID': 999999, 'config': config} # If loading from a pre-trained model, load weights if config['resume']: print('Loading weights...') utils.load_weights(G, D, state_dict, config['weights_root'], experiment_name, config['load_weights'] if config['load_weights'] else None, G_ema if config['ema'] else None) # If parallel, parallelize the GD module if config['parallel']: GD = nn.DataParallel(GD) if config['cross_replica']: patch_replication_callback(GD) # Prepare loggers for stats; metrics holds test metrics, # lmetrics holds any desired training metrics. test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'], experiment_name) train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name) print('Inception Metrics will be saved to {}'.format(test_metrics_fname)) test_log = utils.MetricsLogger(test_metrics_fname, reinitialize=(not config['resume'])) print('Training Metrics will be saved to {}'.format(train_metrics_fname)) train_log = utils.MyLogger(train_metrics_fname, reinitialize=(not config['resume']), logstyle=config['logstyle']) # Write metadata utils.write_metadata(config['logs_root'], experiment_name, config, state_dict) # Prepare data; the Discriminator's batch size is all that needs to be passed # to the dataloader, as G doesn't require dataloading. # Note that at every loader iteration we pass in enough data to complete # a full D iteration (regardless of number of D steps and accumulations) D_batch_size = (config['batch_size'] * config['num_D_steps'] * config['num_D_accumulations']) loaders = utils.get_data_loaders(**{**config, 'batch_size': D_batch_size, 'start_itr': state_dict['itr']}) # Prepare inception metrics: FID and IS get_inception_metrics = inception_utils.prepare_inception_metrics(config['dataset'], config['parallel'], config['no_fid']) # Prepare noise and randomly sampled label arrays # Allow for different batch sizes in G G_batch_size = max(config['G_batch_size'], config['batch_size']) z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) # Prepare a fixed z & y to see individual sample evolution throghout training fixed_z, fixed_y = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'], device=device, fp16=config['G_fp16']) fixed_z.sample_() fixed_y.sample_() # Loaders are loaded, prepare the training function if config['which_train_fn'] == 'GAN': train = train_fns.GAN_training_function(G, D, GD, z_, y_, ema, state_dict, config) # Else, assume debugging and use the dummy train fn else: train = train_fns.dummy_training_function() # Prepare Sample function for use with inception metrics sample = functools.partial(utils.sample, G=(G_ema if config['ema'] and config['use_ema'] else G), z_=z_, y_=y_, config=config) print('Beginning training at epoch %d...' % state_dict['epoch']) # Train for specified number of epochs, although we mostly track G iterations. for epoch in range(state_dict['epoch'], config['num_epochs']): # Which progressbar to use? TQDM or my own? if config['pbar'] == 'mine': pbar = utils.progress(loaders[0],displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta') else: pbar = tqdm(loaders[0]) for i, (x, y) in enumerate(pbar): # Increment the iteration counter state_dict['itr'] += 1 # Make sure G and D are in training mode, just in case they got set to eval # For D, which typically doesn't have BN, this shouldn't matter much. G.train() D.train() if config['ema']: G_ema.train() if config['D_fp16']: x, y = x.to(device).half(), y.to(device) else: x, y = x.to(device), y.to(device) metrics = train(x, y) train_log.log(itr=int(state_dict['itr']), **metrics) # Every sv_log_interval, log singular values if (config['sv_log_interval'] > 0) and (not (state_dict['itr'] % config['sv_log_interval'])): train_log.log(itr=int(state_dict['itr']), **{**utils.get_SVs(G, 'G'), **utils.get_SVs(D, 'D')}) # If using my progbar, print metrics. if config['pbar'] == 'mine': print(', '.join(['itr: %d' % state_dict['itr']] + ['%s : %+4.3f' % (key, metrics[key]) for key in metrics]), end=' ') # Save weights and copies as configured at specified interval if not (state_dict['itr'] % config['save_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() if config['ema']: G_ema.eval() train_fns.save_and_sample(G, D, G_ema, z_, y_, fixed_z, fixed_y, state_dict, config, experiment_name) # Test every specified interval if not (state_dict['itr'] % config['test_every']): if config['G_eval_mode']: print('Switchin G to eval mode...') G.eval() train_fns.test(G, D, G_ema, z_, y_, state_dict, config, sample, get_inception_metrics, experiment_name, test_log) # Increment epoch counter at end of epoch state_dict['epoch'] += 1 def main(): # parse command line and run parser = utils.prepare_parser() config = vars(parser.parse_args()) print(config) run(config) if __name__ == '__main__': main() ================================================ FILE: FQ-BigGAN/train_fns.py ================================================ ''' train_fns.py Functions for the main loop of training different conditional image models ''' import torch import torch.nn as nn import torchvision import os import utils import losses # Dummy training function for debugging def dummy_training_function(): def train(x, y): return {} return train def GAN_training_function(G, D, GD, z_, y_, ema, state_dict, config): def train(x, y): G.optim.zero_grad() D.optim.zero_grad() # How many chunks to split x and y into? x = torch.split(x, config['batch_size']) y = torch.split(y, config['batch_size']) # print('chunks', len(x), len(y)) counter = 0 # Optionally toggle D and G's "require_grad" if config['toggle_grads']: utils.toggle_grad(D, True) utils.toggle_grad(G, False) for step_index in range(config['num_D_steps']): # If accumulating gradients, loop multiple times before an optimizer step D.optim.zero_grad() for accumulation_index in range(config['num_D_accumulations']): z_.sample_() y_.sample_() D_fake, D_real, quant_loss_real, quant_loss_fake, ppl = GD(z_[:config['batch_size']], y_[:config[ 'batch_size']], x[counter], y[counter], train_G=False, split_D=config['split_D']) # Compute components of D's loss, average them, and divide by # the number of gradient accumulations D_loss_real, D_loss_fake = losses.discriminator_loss(D_fake, D_real) D_loss_real += quant_loss_real.mean() D_loss_fake += quant_loss_fake.mean() D_loss = (D_loss_real + D_loss_fake) / float(config['num_D_accumulations']) D_loss.backward() counter += 1 # Optionally apply ortho reg in D if config['D_ortho'] > 0.0: # Debug print to indicate we're using ortho reg in D. print('using modified ortho reg in D') utils.ortho(D, config['D_ortho']) D.optim.step() # Optionally toggle "requires_grad" if config['toggle_grads']: utils.toggle_grad(D, False) utils.toggle_grad(G, True) # Zero G's gradients by default before training G, for safety G.optim.zero_grad() # If accumulating gradients, loop multiple times for accumulation_index in range(config['num_G_accumulations']): z_.sample_() y_.sample_() D_fake, quant_loss_G = GD(z_, y_, train_G=True, split_D=config['split_D']) G_loss = (losses.generator_loss(D_fake) + quant_loss_G.mean()) / float(config['num_G_accumulations']) G_loss.backward() # Optionally apply modified ortho reg in G if config['G_ortho'] > 0.0: print('using modified ortho reg in G') # Debug print to indicate we're using ortho reg in G # Don't ortho reg shared, it makes no sense. Really we should blacklist any embeddings for this utils.ortho(G, config['G_ortho'], blacklist=[param for param in G.shared.parameters()]) G.optim.step() # If we have an ema, update it, regardless of if we test with it or not if config['ema']: ema.update(state_dict['itr']) out = {'G_loss': float(G_loss.item()), 'D_loss_real': float(D_loss_real.item()), 'D_loss_fake': float(D_loss_fake.item()), 'Quant_loss': float(quant_loss_G.mean().item()), 'Perplexity': float(ppl.mean().item()) } # Return G's loss and the components of D's loss. return out return train ''' This function takes in the model, saves the weights (multiple copies if requested), and prepares sample sheets: one consisting of samples given a fixed noise seed (to show how the model evolves throughout training), a set of full conditional sample sheets, and a set of interp sheets. ''' def save_and_sample(G, D, G_ema, z_, y_, fixed_z, fixed_y, state_dict, config, experiment_name): utils.save_weights(G, D, state_dict, config['weights_root'], experiment_name, None, G_ema if config['ema'] else None) # Save an additional copy to mitigate accidental corruption if process # is killed during a save (it's happened to me before -.-) if config['num_save_copies'] > 0: utils.save_weights(G, D, state_dict, config['weights_root'], experiment_name, 'copy%d' % state_dict['save_num'], G_ema if config['ema'] else None) state_dict['save_num'] = (state_dict['save_num'] + 1 ) % config['num_save_copies'] # Use EMA G for samples or non-EMA? which_G = G_ema if config['ema'] and config['use_ema'] else G # Accumulate standing statistics? if config['accumulate_stats']: utils.accumulate_standing_stats(G_ema if config['ema'] and config['use_ema'] else G, z_, y_, config['n_classes'], config['num_standing_accumulations']) # Save a random sample sheet with fixed z and y with torch.no_grad(): if config['parallel']: fixed_Gz = nn.parallel.data_parallel(which_G, (fixed_z, which_G.shared(fixed_y))) else: fixed_Gz = which_G(fixed_z, which_G.shared(fixed_y)) if not os.path.isdir('%s/%s' % (config['samples_root'], experiment_name)): os.mkdir('%s/%s' % (config['samples_root'], experiment_name)) image_filename = '%s/%s/fixed_samples%d.jpg' % (config['samples_root'], experiment_name, state_dict['itr']) torchvision.utils.save_image(fixed_Gz.float().cpu(), image_filename, nrow=int(fixed_Gz.shape[0] **0.5), normalize=True) # For now, every time we save, also save sample sheets utils.sample_sheet(which_G, classes_per_sheet=utils.classes_per_sheet_dict[config['dataset']], num_classes=config['n_classes'], samples_per_class=10, parallel=config['parallel'], samples_root=config['samples_root'], experiment_name=experiment_name, folder_number=state_dict['itr'], z_=z_) # Also save interp sheets for fix_z, fix_y in zip([False, False, True], [False, True, False]): utils.interp_sheet(which_G, num_per_sheet=16, num_midpoints=8, num_classes=config['n_classes'], parallel=config['parallel'], samples_root=config['samples_root'], experiment_name=experiment_name, folder_number=state_dict['itr'], sheet_number=0, fix_z=fix_z, fix_y=fix_y, device='cuda') ''' This function runs the inception metrics code, checks if the results are an improvement over the previous best (either in IS or FID, user-specified), logs the results, and saves a best_ copy if it's an improvement. ''' def test(G, D, G_ema, z_, y_, state_dict, config, sample, get_inception_metrics, experiment_name, test_log): print('Gathering inception metrics...') if config['accumulate_stats']: utils.accumulate_standing_stats(G_ema if config['ema'] and config['use_ema'] else G, z_, y_, config['n_classes'], config['num_standing_accumulations']) IS_mean, IS_std, FID = get_inception_metrics(sample, config['num_inception_images'], num_splits=10) print('Itr %d: PYTORCH UNOFFICIAL Inception Score is %3.3f +/- %3.3f, PYTORCH UNOFFICIAL FID is %5.4f' % (state_dict['itr'], IS_mean, IS_std, FID)) # If improved over previous best metric, save approrpiate copy if ((config['which_best'] == 'IS' and IS_mean > state_dict['best_IS']) or (config['which_best'] == 'FID' and FID < state_dict['best_FID'])): print('%s improved over previous best, saving checkpoint...' % config['which_best']) utils.save_weights(G, D, state_dict, config['weights_root'], experiment_name, 'best%d' % state_dict['save_best_num'], G_ema if config['ema'] else None) state_dict['save_best_num'] = (state_dict['save_best_num'] + 1 ) % config['num_best_copies'] state_dict['best_IS'] = max(state_dict['best_IS'], IS_mean) state_dict['best_FID'] = min(state_dict['best_FID'], FID) # Log results to file test_log.log(itr=int(state_dict['itr']), IS_mean=float(IS_mean), IS_std=float(IS_std), FID=float(FID)) ================================================ FILE: FQ-BigGAN/utility/extract_imagenet.py ================================================ import os import sys import shutil def create_imagenet_ext(src_path, tgt_path, num_class=20): if not os.path.exists(tgt_path): os.mkdir(tgt_path) else: shutil.rmtree(tgt_path) for i, img_dir in enumerate(os.listdir(src_path)): shutil.copytree(os.path.join(src_path, img_dir), os.path.join(tgt_path, img_dir)) if i == num_class-1: break src_path = '/media/cchen/StorageDisk/imagenet/raw-data/train' tgt_path = '/media/cchen/StorageDisk/yzhao/GAN/BigGAN-PyTorch/data/Ext' create_imagenet_ext(src_path, tgt_path) ================================================ FILE: FQ-BigGAN/utility/untar.py ================================================ import tarfile src_path = '/media/cchen/StorageDisk/yzhao/datasets/images/ImageNet/' for fname in src_path: if (fname.endswith("tar.gz")): tar = tarfile.open(fname, "r:gz") tar.extractall() tar.close() elif (fname.endswith("tar")): tar = tarfile.open(fname, "r:") tar.extractall() tar.close() ================================================ FILE: FQ-BigGAN/utils.py ================================================ #!/usr/bin/env python # -*- coding: utf-8 -*- ''' Utilities file This file contains utility functions for bookkeeping, logging, and data loading. Methods which directly affect training should either go in layers, the model, or train_fns.py. ''' from __future__ import print_function import sys import os import numpy as np import time import datetime import json import pickle from argparse import ArgumentParser import animal_hash import torch import torch.nn as nn import torch.nn.functional as F import torchvision import torchvision.transforms as transforms from torch.utils.data import DataLoader import datasets as dset def prepare_parser(): usage = 'Parser for all scripts.' parser = ArgumentParser(description=usage) ### Dataset/Dataloader stuff ### parser.add_argument( '--dataset', type=str, default='I128_hdf5', help='Which Dataset to train on, out of I128, I256, C10, C100;' 'Append "_hdf5" to use the hdf5 version for ISLVRC ' '(default: %(default)s)') parser.add_argument( '--augment', action='store_true', default=False, help='Augment with random crops and flips (default: %(default)s)') parser.add_argument( '--num_workers', type=int, default=8, help='Number of dataloader workers; consider using less for HDF5 ' '(default: %(default)s)') parser.add_argument( '--no_pin_memory', action='store_false', dest='pin_memory', default=True, help='Pin data into memory through dataloader? (default: %(default)s)') parser.add_argument( '--shuffle', action='store_true', default=False, help='Shuffle the data (strongly recommended)? (default: %(default)s)') parser.add_argument( '--load_in_mem', action='store_true', default=False, help='Load all data into memory? (default: %(default)s)') parser.add_argument( '--use_multiepoch_sampler', action='store_true', default=False, help='Use the multi-epoch sampler for dataloader? (default: %(default)s)') ### Quantization layer stuff parser.add_argument( '--dict_decay', type=float, default=0.8, help='discrete dict learning decay') parser.add_argument( '--commitment', type=float, default=0.5, help='regularizer coefficient') parser.add_argument( '--discrete_layer', type=str, default='2', help='which layer to add the discretization') parser.add_argument( '--dict_size', type=int, default=10, help='number of keys in dict') ### Model stuff ### parser.add_argument( '--model', type=str, default='BigGAN', help='Name of the model module (default: %(default)s)') parser.add_argument( '--G_param', type=str, default='SN', help='Parameterization style to use for G, spectral norm (SN) or SVD (SVD)' ' or None (default: %(default)s)') parser.add_argument( '--D_param', type=str, default='SN', help='Parameterization style to use for D, spectral norm (SN) or SVD (SVD)' ' or None (default: %(default)s)') parser.add_argument( '--G_ch', type=int, default=64, help='Channel multiplier for G (default: %(default)s)') parser.add_argument( '--D_ch', type=int, default=64, help='Channel multiplier for D (default: %(default)s)') parser.add_argument( '--G_depth', type=int, default=1, help='Number of resblocks per stage in G? (default: %(default)s)') parser.add_argument( '--D_depth', type=int, default=1, help='Number of resblocks per stage in D? (default: %(default)s)') parser.add_argument( '--D_thin', action='store_false', dest='D_wide', default=True, help='Use the SN-GAN channel pattern for D? (default: %(default)s)') parser.add_argument( '--G_shared', action='store_true', default=False, help='Use shared embeddings in G? (default: %(default)s)') parser.add_argument( '--shared_dim', type=int, default=0, help='G''s shared embedding dimensionality; if 0, will be equal to dim_z. ' '(default: %(default)s)') parser.add_argument( '--dim_z', type=int, default=128, help='Noise dimensionality: %(default)s)') parser.add_argument( '--z_var', type=float, default=1.0, help='Noise variance: %(default)s)') parser.add_argument( '--hier', action='store_true', default=False, help='Use hierarchical z in G? (default: %(default)s)') parser.add_argument( '--cross_replica', action='store_true', default=False, help='Cross_replica batchnorm in G?(default: %(default)s)') parser.add_argument( '--mybn', action='store_true', default=False, help='Use my batchnorm (which supports standing stats?) %(default)s)') parser.add_argument( '--G_nl', type=str, default='relu', help='Activation function for G (default: %(default)s)') parser.add_argument( '--D_nl', type=str, default='relu', help='Activation function for D (default: %(default)s)') parser.add_argument( '--G_attn', type=str, default='64', help='What resolutions to use attention on for G (underscore separated) ' '(default: %(default)s)') parser.add_argument( '--D_attn', type=str, default='64', help='What resolutions to use attention on for D (underscore separated) ' '(default: %(default)s)') parser.add_argument( '--norm_style', type=str, default='bn', help='Normalizer style for G, one of bn [batchnorm], in [instancenorm], ' 'ln [layernorm], gn [groupnorm] (default: %(default)s)') ### Model init stuff ### parser.add_argument( '--seed', type=int, default=0, help='Random seed to use; affects both initialization and ' ' dataloading. (default: %(default)s)') parser.add_argument( '--G_init', type=str, default='ortho', help='Init style to use for G (default: %(default)s)') parser.add_argument( '--D_init', type=str, default='ortho', help='Init style to use for D(default: %(default)s)') parser.add_argument( '--skip_init', action='store_true', default=False, help='Skip initialization, ideal for testing when ortho init was used ' '(default: %(default)s)') ### Optimizer stuff ### parser.add_argument( '--G_lr', type=float, default=5e-5, help='Learning rate to use for Generator (default: %(default)s)') parser.add_argument( '--D_lr', type=float, default=2e-4, help='Learning rate to use for Discriminator (default: %(default)s)') parser.add_argument( '--G_B1', type=float, default=0.0, help='Beta1 to use for Generator (default: %(default)s)') parser.add_argument( '--D_B1', type=float, default=0.0, help='Beta1 to use for Discriminator (default: %(default)s)') parser.add_argument( '--G_B2', type=float, default=0.999, help='Beta2 to use for Generator (default: %(default)s)') parser.add_argument( '--D_B2', type=float, default=0.999, help='Beta2 to use for Discriminator (default: %(default)s)') ### Batch size, parallel, and precision stuff ### parser.add_argument( '--batch_size', type=int, default=64, help='Default overall batchsize (default: %(default)s)') parser.add_argument( '--G_batch_size', type=int, default=0, help='Batch size to use for G; if 0, same as D (default: %(default)s)') parser.add_argument( '--num_G_accumulations', type=int, default=1, help='Number of passes to accumulate G''s gradients over ' '(default: %(default)s)') parser.add_argument( '--num_D_steps', type=int, default=2, help='Number of D steps per G step (default: %(default)s)') parser.add_argument( '--num_D_accumulations', type=int, default=1, help='Number of passes to accumulate D''s gradients over ' '(default: %(default)s)') parser.add_argument( '--split_D', action='store_true', default=False, help='Run D twice rather than concatenating inputs? (default: %(default)s)') parser.add_argument( '--num_epochs', type=int, default=100, help='Number of epochs to train for (default: %(default)s)') parser.add_argument( '--parallel', action='store_true', default=False, help='Train with multiple GPUs (default: %(default)s)') parser.add_argument( '--G_fp16', action='store_true', default=False, help='Train with half-precision in G? (default: %(default)s)') parser.add_argument( '--D_fp16', action='store_true', default=False, help='Train with half-precision in D? (default: %(default)s)') parser.add_argument( '--D_mixed_precision', action='store_true', default=False, help='Train with half-precision activations but fp32 params in D? ' '(default: %(default)s)') parser.add_argument( '--G_mixed_precision', action='store_true', default=False, help='Train with half-precision activations but fp32 params in G? ' '(default: %(default)s)') parser.add_argument( '--accumulate_stats', action='store_true', default=False, help='Accumulate "standing" batchnorm stats? (default: %(default)s)') parser.add_argument( '--num_standing_accumulations', type=int, default=16, help='Number of forward passes to use in accumulating standing stats? ' '(default: %(default)s)') ### Bookkeping stuff ### parser.add_argument( '--G_eval_mode', action='store_true', default=False, help='Run G in eval mode (running/standing stats?) at sample/test time? ' '(default: %(default)s)') parser.add_argument( '--save_every', type=int, default=2000, help='Save every X iterations (default: %(default)s)') parser.add_argument( '--num_save_copies', type=int, default=2, help='How many copies to save (default: %(default)s)') parser.add_argument( '--num_best_copies', type=int, default=2, help='How many previous best checkpoints to save (default: %(default)s)') parser.add_argument( '--which_best', type=str, default='FID', help='Which metric to use to determine when to save new "best"' 'checkpoints, one of IS or FID (default: %(default)s)') parser.add_argument( '--no_fid', action='store_true', default=False, help='Calculate IS only, not FID? (default: %(default)s)') parser.add_argument( '--test_every', type=int, default=5000, help='Test every X iterations (default: %(default)s)') parser.add_argument( '--num_inception_images', type=int, default=50000, help='Number of samples to compute inception metrics with ' '(default: %(default)s)') parser.add_argument( '--hashname', action='store_true', default=False, help='Use a hash of the experiment name instead of the full config ' '(default: %(default)s)') parser.add_argument( '--base_root', type=str, default='', help='Default location to store all weights, samples, data, and logs ' ' (default: %(default)s)') parser.add_argument( '--data_root', type=str, default='data', help='Default location where data is stored (default: %(default)s)') parser.add_argument( '--weights_root', type=str, default='weights', help='Default location to store weights (default: %(default)s)') parser.add_argument( '--logs_root', type=str, default='logs', help='Default location to store logs (default: %(default)s)') parser.add_argument( '--samples_root', type=str, default='samples', help='Default location to store samples (default: %(default)s)') parser.add_argument( '--pbar', type=str, default='mine', help='Type of progressbar to use; one of "mine" or "tqdm" ' '(default: %(default)s)') parser.add_argument( '--name_suffix', type=str, default='', help='Suffix for experiment name for loading weights for sampling ' '(consider "best0") (default: %(default)s)') parser.add_argument( '--experiment_name', type=str, default='', help='Optionally override the automatic experiment naming with this arg. ' '(default: %(default)s)') parser.add_argument( '--config_from_name', action='store_true', default=False, help='Use a hash of the experiment name instead of the full config ' '(default: %(default)s)') ### EMA Stuff ### parser.add_argument( '--ema', action='store_true', default=False, help='Keep an ema of G''s weights? (default: %(default)s)') parser.add_argument( '--ema_decay', type=float, default=0.9999, help='EMA decay rate (default: %(default)s)') parser.add_argument( '--use_ema', action='store_true', default=False, help='Use the EMA parameters of G for evaluation? (default: %(default)s)') parser.add_argument( '--ema_start', type=int, default=0, help='When to start updating the EMA weights (default: %(default)s)') ### Numerical precision and SV stuff ### parser.add_argument( '--adam_eps', type=float, default=1e-8, help='epsilon value to use for Adam (default: %(default)s)') parser.add_argument( '--BN_eps', type=float, default=1e-5, help='epsilon value to use for BatchNorm (default: %(default)s)') parser.add_argument( '--SN_eps', type=float, default=1e-8, help='epsilon value to use for Spectral Norm(default: %(default)s)') parser.add_argument( '--num_G_SVs', type=int, default=1, help='Number of SVs to track in G (default: %(default)s)') parser.add_argument( '--num_D_SVs', type=int, default=1, help='Number of SVs to track in D (default: %(default)s)') parser.add_argument( '--num_G_SV_itrs', type=int, default=1, help='Number of SV itrs in G (default: %(default)s)') parser.add_argument( '--num_D_SV_itrs', type=int, default=1, help='Number of SV itrs in D (default: %(default)s)') ### Ortho reg stuff ### parser.add_argument( '--G_ortho', type=float, default=0.0, # 1e-4 is default for BigGAN help='Modified ortho reg coefficient in G(default: %(default)s)') parser.add_argument( '--D_ortho', type=float, default=0.0, help='Modified ortho reg coefficient in D (default: %(default)s)') parser.add_argument( '--toggle_grads', action='store_true', default=True, help='Toggle D and G''s "requires_grad" settings when not training them? ' ' (default: %(default)s)') ### Which train function ### parser.add_argument( '--which_train_fn', type=str, default='GAN', help='How2trainyourbois (default: %(default)s)') ### Resume training stuff parser.add_argument( '--load_weights', type=str, default='', help='Suffix for which weights to load (e.g. best0, copy0) ' '(default: %(default)s)') parser.add_argument( '--resume', action='store_true', default=False, help='Resume training? (default: %(default)s)') ### Log stuff ### parser.add_argument( '--logstyle', type=str, default='%3.3e', help='What style to use when logging training metrics?' 'One of: %#.#f/ %#.#e (float/exp, text),' 'pickle (python pickle),' 'npz (numpy zip),' 'mat (MATLAB .mat file) (default: %(default)s)') parser.add_argument( '--log_G_spectra', action='store_true', default=False, help='Log the top 3 singular values in each SN layer in G? ' '(default: %(default)s)') parser.add_argument( '--log_D_spectra', action='store_true', default=False, help='Log the top 3 singular values in each SN layer in D? ' '(default: %(default)s)') parser.add_argument( '--sv_log_interval', type=int, default=10, help='Iteration interval for logging singular values ' ' (default: %(default)s)') return parser # Arguments for sample.py; not presently used in train.py def add_sample_parser(parser): parser.add_argument( '--sample_npz', action='store_true', default=False, help='Sample "sample_num_npz" images and save to npz? ' '(default: %(default)s)') parser.add_argument( '--sample_num_npz', type=int, default=50000, help='Number of images to sample when sampling NPZs ' '(default: %(default)s)') parser.add_argument( '--sample_sheets', action='store_true', default=False, help='Produce class-conditional sample sheets and stick them in ' 'the samples root? (default: %(default)s)') parser.add_argument( '--sample_interps', action='store_true', default=False, help='Produce interpolation sheets and stick them in ' 'the samples root? (default: %(default)s)') parser.add_argument( '--sample_sheet_folder_num', type=int, default=-1, help='Number to use for the folder for these sample sheets ' '(default: %(default)s)') parser.add_argument( '--sample_random', action='store_true', default=False, help='Produce a single random sheet? (default: %(default)s)') parser.add_argument( '--sample_trunc_curves', type=str, default='', help='Get inception metrics with a range of variances?' 'To use this, specify a startpoint, step, and endpoint, e.g. ' '--sample_trunc_curves 0.2_0.1_1.0 for a startpoint of 0.2, ' 'endpoint of 1.0, and stepsize of 1.0. Note that this is ' 'not exactly identical to using tf.truncated_normal, but should ' 'have approximately the same effect. (default: %(default)s)') parser.add_argument( '--sample_inception_metrics', action='store_true', default=False, help='Calculate Inception metrics with sample.py? (default: %(default)s)') return parser # Convenience dicts dset_dict = {'I32': dset.ImageFolder, 'I64': dset.ImageFolder, 'I128': dset.ImageFolder, 'I256': dset.ImageFolder, 'I32_hdf5': dset.ILSVRC_HDF5, 'I64_hdf5': dset.ILSVRC_HDF5, 'I128_hdf5': dset.ILSVRC_HDF5, 'I256_hdf5': dset.ILSVRC_HDF5, 'C10': dset.CIFAR10, 'C100': dset.CIFAR100, 'I64ext': dset.ImageFolder, 'I64ext_hdf5': dset.ILSVRC_HDF5, 'I128ext': dset.ImageFolder, 'I128ext_hdf5': dset.ILSVRC_HDF5} imsize_dict = {'I32': 32, 'I32_hdf5': 32, 'I64': 64, 'I64_hdf5': 64, 'I128': 128, 'I128_hdf5': 128, 'I256': 256, 'I256_hdf5': 256, 'C10': 32, 'C100': 32, 'I64ext': 64, 'I64ext_hdf5': 64, 'I128ext': 128, 'I128ext_hdf5': 128} root_dict = {'I32': 'ImageNet', 'I32_hdf5': 'ILSVRC32.hdf5', 'I64': 'ImageNet', 'I64_hdf5': 'ILSVRC64.hdf5', 'I128': 'ImageNet', 'I128_hdf5': 'ILSVRC128.hdf5', 'I256': 'ImageNet', 'I256_hdf5': 'ILSVRC256.hdf5', 'C10': 'cifar', 'C100': 'cifar', 'I64ext': 'Ext', 'I64ext_hdf5': 'I64Ext.hdf5', 'I128ext': 'Ext', 'I128ext_hdf5': 'I128Ext.hdf5',} nclass_dict = {'I32': 1000, 'I32_hdf5': 1000, 'I64': 1000, 'I64_hdf5': 1000, 'I128': 1000, 'I128_hdf5': 1000, 'I256': 1000, 'I256_hdf5': 1000, 'C10': 10, 'C100': 100, 'I64ext': 20, 'I64ext_hdf5': 20, 'I128ext': 10, 'I128ext_hdf5': 10} # Number of classes to put per sample sheet classes_per_sheet_dict = {'I32': 50, 'I32_hdf5': 50, 'I64': 50, 'I64_hdf5': 50, 'I128': 20, 'I128_hdf5': 20, 'I256': 20, 'I256_hdf5': 20, 'C10': 10, 'C100': 100, 'I64ext': 20, 'I64ext_hdf5': 20, 'I128ext': 20, 'I128ext_hdf5': 20} activation_dict = {'inplace_relu': nn.ReLU(inplace=True), 'relu': nn.ReLU(inplace=False), 'ir': nn.ReLU(inplace=True),} class CenterCropLongEdge(object): """Crops the given PIL Image on the long edge. Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. """ def __call__(self, img): """ Args: img (PIL Image): Image to be cropped. Returns: PIL Image: Cropped image. """ return transforms.functional.center_crop(img, min(img.size)) def __repr__(self): return self.__class__.__name__ class RandomCropLongEdge(object): """Crops the given PIL Image on the long edge with a random start point. Args: size (sequence or int): Desired output size of the crop. If size is an int instead of sequence like (h, w), a square crop (size, size) is made. """ def __call__(self, img): """ Args: img (PIL Image): Image to be cropped. Returns: PIL Image: Cropped image. """ size = (min(img.size), min(img.size)) # Only step forward along this edge if it's the long edge i = (0 if size[0] == img.size[0] else np.random.randint(low=0,high=img.size[0] - size[0])) j = (0 if size[1] == img.size[1] else np.random.randint(low=0,high=img.size[1] - size[1])) return transforms.functional.crop(img, i, j, size[0], size[1]) def __repr__(self): return self.__class__.__name__ # multi-epoch Dataset sampler to avoid memory leakage and enable resumption of # training from the same sample regardless of if we stop mid-epoch class MultiEpochSampler(torch.utils.data.Sampler): r"""Samples elements randomly over multiple epochs Arguments: data_source (Dataset): dataset to sample from num_epochs (int) : Number of times to loop over the dataset start_itr (int) : which iteration to begin from """ def __init__(self, data_source, num_epochs, start_itr=0, batch_size=128): self.data_source = data_source self.num_samples = len(self.data_source) self.num_epochs = num_epochs self.start_itr = start_itr self.batch_size = batch_size if not isinstance(self.num_samples, int) or self.num_samples <= 0: raise ValueError("num_samples should be a positive integeral " "value, but got num_samples={}".format(self.num_samples)) def __iter__(self): n = len(self.data_source) # Determine number of epochs num_epochs = int(np.ceil((n * self.num_epochs - (self.start_itr * self.batch_size)) / float(n))) # Sample all the indices, and then grab the last num_epochs index sets; # This ensures if we're starting at epoch 4, we're still grabbing epoch 4's # indices out = [torch.randperm(n) for epoch in range(self.num_epochs)][-num_epochs:] # Ignore the first start_itr % n indices of the first epoch out[0] = out[0][(self.start_itr * self.batch_size % n):] # if self.replacement: # return iter(torch.randint(high=n, size=(self.num_samples,), dtype=torch.int64).tolist()) # return iter(.tolist()) output = torch.cat(out).tolist() print('Length dataset output is %d' % len(output)) return iter(output) def __len__(self): return len(self.data_source) * self.num_epochs - self.start_itr * self.batch_size # Convenience function to centralize all data loaders def get_data_loaders(dataset, data_root=None, augment=False, batch_size=64, num_workers=8, shuffle=True, load_in_mem=False, hdf5=False, pin_memory=True, drop_last=True, start_itr=0, num_epochs=500, use_multiepoch_sampler=False, **kwargs): # Append /FILENAME.hdf5 to root if using hdf5 data_root += '/%s' % root_dict[dataset] print('Using dataset root location %s' % data_root) which_dataset = dset_dict[dataset] norm_mean = [0.5,0.5,0.5] norm_std = [0.5,0.5,0.5] image_size = imsize_dict[dataset] # For image folder datasets, name of the file where we store the precomputed # image locations to avoid having to walk the dirs every time we load. dataset_kwargs = {'index_filename': '%s_imgs.npz' % dataset} # HDF5 datasets have their own inbuilt transform, no need to train_transform if 'hdf5' in dataset: train_transform = None else: if augment: print('Data will be augmented...') if dataset in ['C10', 'C100']: train_transform = [transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip()] else: train_transform = [RandomCropLongEdge(), transforms.Resize(image_size), transforms.RandomHorizontalFlip()] else: print('Data will not be augmented...') if dataset in ['C10', 'C100']: train_transform = [] else: train_transform = [CenterCropLongEdge(), transforms.Resize(image_size)] # train_transform = [transforms.Resize(image_size), transforms.CenterCrop] train_transform = transforms.Compose(train_transform + [ transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std)]) train_set = which_dataset(root=data_root, transform=train_transform, load_in_mem=load_in_mem, **dataset_kwargs) # Prepare loader; the loaders list is for forward compatibility with # using validation / test splits. loaders = [] if use_multiepoch_sampler: print('Using multiepoch sampler from start_itr %d...' % start_itr) loader_kwargs = {'num_workers': num_workers, 'pin_memory': pin_memory} sampler = MultiEpochSampler(train_set, num_epochs, start_itr, batch_size) train_loader = DataLoader(train_set, batch_size=batch_size, sampler=sampler, **loader_kwargs) else: loader_kwargs = {'num_workers': num_workers, 'pin_memory': pin_memory, 'drop_last': drop_last} # Default, drop last incomplete batch train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=shuffle, **loader_kwargs) loaders.append(train_loader) return loaders # Utility file to seed rngs def seed_rng(seed): torch.manual_seed(seed) torch.cuda.manual_seed(seed) np.random.seed(seed) # Utility to peg all roots to a base root # If a base root folder is provided, peg all other root folders to it. def update_config_roots(config): if config['base_root']: print('Pegging all root folders to base root %s' % config['base_root']) for key in ['data', 'weights', 'logs', 'samples']: config['%s_root' % key] = '%s/%s' % (config['base_root'], key) return config # Utility to prepare root folders if they don't exist; parent folder must exist def prepare_root(config): for key in ['weights_root', 'logs_root', 'samples_root']: if not os.path.exists(config[key]): print('Making directory %s for %s...' % (config[key], key)) os.mkdir(config[key]) # Simple wrapper that applies EMA to a model. COuld be better done in 1.0 using # the parameters() and buffers() module functions, but for now this works # with state_dicts using .copy_ class ema(object): def __init__(self, source, target, decay=0.9999, start_itr=0): self.source = source self.target = target self.decay = decay # Optional parameter indicating what iteration to start the decay at self.start_itr = start_itr # Initialize target's params to be source's self.source_dict = self.source.state_dict() self.target_dict = self.target.state_dict() print('Initializing EMA parameters to be source parameters...') with torch.no_grad(): for key in self.source_dict: self.target_dict[key].data.copy_(self.source_dict[key].data) # target_dict[key].data = source_dict[key].data # Doesn't work! def update(self, itr=None): # If an iteration counter is provided and itr is less than the start itr, # peg the ema weights to the underlying weights. if itr and itr < self.start_itr: decay = 0.0 else: decay = self.decay with torch.no_grad(): for key in self.source_dict: self.target_dict[key].data.copy_(self.target_dict[key].data * decay + self.source_dict[key].data * (1 - decay)) # Apply modified ortho reg to a model # This function is an optimized version that directly computes the gradient, # instead of computing and then differentiating the loss. def ortho(model, strength=1e-4, blacklist=[]): with torch.no_grad(): for param in model.parameters(): # Only apply this to parameters with at least 2 axes, and not in the blacklist if len(param.shape) < 2 or any([param is item for item in blacklist]): continue w = param.view(param.shape[0], -1) grad = (2 * torch.mm(torch.mm(w, w.t()) * (1. - torch.eye(w.shape[0], device=w.device)), w)) param.grad.data += strength * grad.view(param.shape) # Default ortho reg # This function is an optimized version that directly computes the gradient, # instead of computing and then differentiating the loss. def default_ortho(model, strength=1e-4, blacklist=[]): with torch.no_grad(): for param in model.parameters(): # Only apply this to parameters with at least 2 axes & not in blacklist if len(param.shape) < 2 or param in blacklist: continue w = param.view(param.shape[0], -1) grad = (2 * torch.mm(torch.mm(w, w.t()) - torch.eye(w.shape[0], device=w.device), w)) param.grad.data += strength * grad.view(param.shape) # Convenience utility to switch off requires_grad def toggle_grad(model, on_or_off): for param in model.parameters(): param.requires_grad = on_or_off # Function to join strings or ignore them # Base string is the string to link "strings," while strings # is a list of strings or Nones. def join_strings(base_string, strings): return base_string.join([item for item in strings if item]) # Save a model's weights, optimizer, and the state_dict def save_weights(G, D, state_dict, weights_root, experiment_name, name_suffix=None, G_ema=None): root = '/'.join([weights_root, experiment_name]) if not os.path.exists(root): os.mkdir(root) if name_suffix: print('Saving weights to %s/%s...' % (root, name_suffix)) else: print('Saving weights to %s...' % root) torch.save(G.state_dict(), '%s/%s.pth' % (root, join_strings('_', ['G', name_suffix]))) torch.save(G.optim.state_dict(), '%s/%s.pth' % (root, join_strings('_', ['G_optim', name_suffix]))) torch.save(D.state_dict(), '%s/%s.pth' % (root, join_strings('_', ['D', name_suffix]))) torch.save(D.optim.state_dict(), '%s/%s.pth' % (root, join_strings('_', ['D_optim', name_suffix]))) torch.save(state_dict, '%s/%s.pth' % (root, join_strings('_', ['state_dict', name_suffix]))) if G_ema is not None: torch.save(G_ema.state_dict(), '%s/%s.pth' % (root, join_strings('_', ['G_ema', name_suffix]))) # Load a model's weights, optimizer, and the state_dict def load_weights(G, D, state_dict, weights_root, experiment_name, name_suffix=None, G_ema=None, strict=True, load_optim=True): root = '/'.join([weights_root, experiment_name]) if name_suffix: print('Loading %s weights from %s...' % (name_suffix, root)) else: print('Loading weights from %s...' % root) if G is not None: G.load_state_dict( torch.load('%s/%s.pth' % (root, join_strings('_', ['G', name_suffix]))), strict=strict) if load_optim: G.optim.load_state_dict( torch.load('%s/%s.pth' % (root, join_strings('_', ['G_optim', name_suffix])))) if D is not None: D.load_state_dict( torch.load('%s/%s.pth' % (root, join_strings('_', ['D', name_suffix]))), strict=strict) if load_optim: D.optim.load_state_dict( torch.load('%s/%s.pth' % (root, join_strings('_', ['D_optim', name_suffix])))) # Load state dict for item in state_dict: state_dict[item] = torch.load('%s/%s.pth' % (root, join_strings('_', ['state_dict', name_suffix])))[item] if G_ema is not None: G_ema.load_state_dict( torch.load('%s/%s.pth' % (root, join_strings('_', ['G_ema', name_suffix]))), strict=strict) ''' MetricsLogger originally stolen from VoxNet source code. Used for logging inception metrics''' class MetricsLogger(object): def __init__(self, fname, reinitialize=False): self.fname = fname self.reinitialize = reinitialize if os.path.exists(self.fname): if self.reinitialize: print('{} exists, deleting...'.format(self.fname)) os.remove(self.fname) def log(self, record=None, **kwargs): """ Assumption: no newlines in the input. """ if record is None: record = {} record.update(kwargs) record['_stamp'] = time.time() with open(self.fname, 'a') as f: f.write(json.dumps(record, ensure_ascii=True) + '\n') # Logstyle is either: # '%#.#f' for floating point representation in text # '%#.#e' for exponent representation in text # 'npz' for output to npz # NOT YET SUPPORTED # 'pickle' for output to a python pickle # NOT YET SUPPORTED # 'mat' for output to a MATLAB .mat file # NOT YET SUPPORTED class MyLogger(object): def __init__(self, fname, reinitialize=False, logstyle='%3.3f'): self.root = fname if not os.path.exists(self.root): os.mkdir(self.root) self.reinitialize = reinitialize self.metrics = [] self.logstyle = logstyle # One of '%3.3f' or like '%3.3e' # Delete log if re-starting and log already exists def reinit(self, item): if os.path.exists('%s/%s.log' % (self.root, item)): if self.reinitialize: # Only print the removal mess if 'sv' in item : if not any('sv' in item for item in self.metrics): print('Deleting singular value logs...') else: print('{} exists, deleting...'.format('%s_%s.log' % (self.root, item))) os.remove('%s/%s.log' % (self.root, item)) # Log in plaintext; this is designed for being read in MATLAB(sorry not sorry) def log(self, itr, **kwargs): for arg in kwargs: if arg not in self.metrics: if self.reinitialize: self.reinit(arg) self.metrics += [arg] if self.logstyle == 'pickle': print('Pickle not currently supported...') # with open('%s/%s.log' % (self.root, arg), 'a') as f: # pickle.dump(kwargs[arg], f) elif self.logstyle == 'mat': print('.mat logstyle not currently supported...') else: with open('%s/%s.log' % (self.root, arg), 'a') as f: f.write('%d: %s\n' % (itr, self.logstyle % kwargs[arg])) # Write some metadata to the logs directory def write_metadata(logs_root, experiment_name, config, state_dict): with open(('%s/%s/metalog.txt' % (logs_root, experiment_name)), 'w') as writefile: writefile.write('datetime: %s\n' % str(datetime.datetime.now())) writefile.write('config: %s\n' % str(config)) writefile.write('state: %s\n' %str(state_dict)) """ Very basic progress indicator to wrap an iterable in. Author: Jan Schlüter Andy's adds: time elapsed in addition to ETA, makes it possible to add estimated time to 1k iters instead of estimated time to completion. """ def progress(items, desc='', total=None, min_delay=0.1, displaytype='s1k'): """ Returns a generator over `items`, printing the number and percentage of items processed and the estimated remaining processing time before yielding the next item. `total` gives the total number of items (required if `items` has no length), and `min_delay` gives the minimum time in seconds between subsequent prints. `desc` gives an optional prefix text (end with a space). """ total = total or len(items) t_start = time.time() t_last = 0 for n, item in enumerate(items): t_now = time.time() if t_now - t_last > min_delay: print("\r%s%d/%d (%6.2f%%)" % ( desc, n+1, total, n / float(total) * 100), end=" ") if n > 0: if displaytype == 's1k': # minutes/seconds for 1000 iters next_1000 = n + (1000 - n%1000) t_done = t_now - t_start t_1k = t_done / n * next_1000 outlist = list(divmod(t_done, 60)) + list(divmod(t_1k - t_done, 60)) print("(TE/ET1k: %d:%02d / %d:%02d)" % tuple(outlist), end=" ") else:# displaytype == 'eta': t_done = t_now - t_start t_total = t_done / n * total outlist = list(divmod(t_done, 60)) + list(divmod(t_total - t_done, 60)) print("(TE/ETA: %d:%02d / %d:%02d)" % tuple(outlist), end=" ") sys.stdout.flush() t_last = t_now yield item t_total = time.time() - t_start print("\r%s%d/%d (100.00%%) (took %d:%02d)" % ((desc, total, total) + divmod(t_total, 60))) # Sample function for use with inception metrics def sample(G, z_, y_, config): with torch.no_grad(): z_.sample_() y_.sample_() if config['parallel']: G_z = nn.parallel.data_parallel(G, (z_, G.shared(y_))) else: G_z = G(z_, G.shared(y_)) return G_z, y_ # Sample function for sample sheets def sample_sheet(G, classes_per_sheet, num_classes, samples_per_class, parallel, samples_root, experiment_name, folder_number, z_=None): # Prepare sample directory if not os.path.isdir('%s/%s' % (samples_root, experiment_name)): os.mkdir('%s/%s' % (samples_root, experiment_name)) if not os.path.isdir('%s/%s/%d' % (samples_root, experiment_name, folder_number)): os.mkdir('%s/%s/%d' % (samples_root, experiment_name, folder_number)) # loop over total number of sheets for i in range(num_classes // classes_per_sheet): ims = [] y = torch.arange(i * classes_per_sheet, (i + 1) * classes_per_sheet, device='cuda') for j in range(samples_per_class): if (z_ is not None) and hasattr(z_, 'sample_') and classes_per_sheet <= z_.size(0): z_.sample_() else: z_ = torch.randn(classes_per_sheet, G.dim_z, device='cuda') with torch.no_grad(): if parallel: o = nn.parallel.data_parallel(G, (z_[:classes_per_sheet], G.shared(y))) else: o = G(z_[:classes_per_sheet], G.shared(y)) ims += [o.data.cpu()] # This line should properly unroll the images out_ims = torch.stack(ims, 1).view(-1, ims[0].shape[1], ims[0].shape[2], ims[0].shape[3]).data.float().cpu() # The path for the samples image_filename = '%s/%s/%d/samples%d.jpg' % (samples_root, experiment_name, folder_number, i) torchvision.utils.save_image(out_ims, image_filename, nrow=samples_per_class, normalize=True) # Interp function; expects x0 and x1 to be of shape (shape0, 1, rest_of_shape..) def interp(x0, x1, num_midpoints): lerp = torch.linspace(0, 1.0, num_midpoints + 2, device='cuda').to(x0.dtype) return ((x0 * (1 - lerp.view(1, -1, 1))) + (x1 * lerp.view(1, -1, 1))) # interp sheet function # Supports full, class-wise and intra-class interpolation def interp_sheet(G, num_per_sheet, num_midpoints, num_classes, parallel, samples_root, experiment_name, folder_number, sheet_number=0, fix_z=False, fix_y=False, device='cuda'): # Prepare zs and ys if fix_z: # If fix Z, only sample 1 z per row zs = torch.randn(num_per_sheet, 1, G.dim_z, device=device) zs = zs.repeat(1, num_midpoints + 2, 1).view(-1, G.dim_z) else: zs = interp(torch.randn(num_per_sheet, 1, G.dim_z, device=device), torch.randn(num_per_sheet, 1, G.dim_z, device=device), num_midpoints).view(-1, G.dim_z) if fix_y: # If fix y, only sample 1 z per row ys = sample_1hot(num_per_sheet, num_classes) ys = G.shared(ys).view(num_per_sheet, 1, -1) ys = ys.repeat(1, num_midpoints + 2, 1).view(num_per_sheet * (num_midpoints + 2), -1) else: ys = interp(G.shared(sample_1hot(num_per_sheet, num_classes)).view(num_per_sheet, 1, -1), G.shared(sample_1hot(num_per_sheet, num_classes)).view(num_per_sheet, 1, -1), num_midpoints).view(num_per_sheet * (num_midpoints + 2), -1) # Run the net--note that we've already passed y through G.shared. if G.fp16: zs = zs.half() with torch.no_grad(): if parallel: out_ims = nn.parallel.data_parallel(G, (zs, ys)).data.cpu() else: out_ims = G(zs, ys).data.cpu() interp_style = '' + ('Z' if not fix_z else '') + ('Y' if not fix_y else '') image_filename = '%s/%s/%d/interp%s%d.jpg' % (samples_root, experiment_name, folder_number, interp_style, sheet_number) torchvision.utils.save_image(out_ims, image_filename, nrow=num_midpoints + 2, normalize=True) # Convenience debugging function to print out gradnorms and shape from each layer # May need to rewrite this so we can actually see which parameter is which def print_grad_norms(net): gradsums = [[float(torch.norm(param.grad).item()), float(torch.norm(param).item()), param.shape] for param in net.parameters()] order = np.argsort([item[0] for item in gradsums]) print(['%3.3e,%3.3e, %s' % (gradsums[item_index][0], gradsums[item_index][1], str(gradsums[item_index][2])) for item_index in order]) # Get singular values to log. This will use the state dict to find them # and substitute underscores for dots. def get_SVs(net, prefix): d = net.state_dict() return {('%s_%s' % (prefix, key)).replace('.', '_') : float(d[key].item()) for key in d if 'sv' in key} # Name an experiment based on its config def name_from_config(config): name = '_'.join([ item for item in [ 'Big%s' % config['which_train_fn'], config['dataset'], config['model'] if config['model'] != 'BigGAN' else None, 'seed%d' % config['seed'], 'Gch%d' % config['G_ch'], 'Dch%d' % config['D_ch'], # 'Gd%d' % config['G_depth'] if config['G_depth'] > 1 else None, # 'Dd%d' % config['D_depth'] if config['D_depth'] > 1 else None, 'bs%d' % config['batch_size'], # 'Gfp16' if config['G_fp16'] else None, # 'Dfp16' if config['D_fp16'] else None, # 'nDs%d' % config['num_D_steps'] if config['num_D_steps'] > 1 else None, 'nDa%d' % config['num_D_accumulations'] if config['num_D_accumulations'] > 1 else None, 'nGa%d' % config['num_G_accumulations'] if config['num_G_accumulations'] > 1 else None, # 'Glr%2.1e' % config['G_lr'], # 'Dlr%2.1e' % config['D_lr'], # 'GB%3.3f' % config['G_B1'] if config['G_B1'] !=0.0 else None, # 'GBB%3.3f' % config['G_B2'] if config['G_B2'] !=0.999 else None, # 'DB%3.3f' % config['D_B1'] if config['D_B1'] !=0.0 else None, # 'DBB%3.3f' % config['D_B2'] if config['D_B2'] !=0.999 else None, # 'Gnl%s' % config['G_nl'], # 'Dnl%s' % config['D_nl'], # 'Ginit%s' % config['G_init'], # 'Dinit%s' % config['D_init'], # 'G%s' % config['G_param'] if config['G_param'] != 'SN' else None, # 'D%s' % config['D_param'] if config['D_param'] != 'SN' else None, 'Gattn%s' % config['G_attn'] if config['G_attn'] != '0' else None, 'Dattn%s' % config['D_attn'] if config['D_attn'] != '0' else None, # 'Gortho%2.1e' % config['G_ortho'] if config['G_ortho'] > 0.0 else None, # 'Dortho%2.1e' % config['D_ortho'] if config['D_ortho'] > 0.0 else None, # config['norm_style'] if config['norm_style'] != 'bn' else None, # 'cr' if config['cross_replica'] else None, # 'Gshared' if config['G_shared'] else None, # 'hier' if config['hier'] else None, # 'ema' if config['ema'] else None, 'Commit%3.2f' % config['commitment'] if config['commitment'] else None, 'Layer%s' % config['discrete_layer'] if config['discrete_layer'] else None, 'Dicsz%d' % config['dict_size'] if config['dict_size'] else None, 'Dicdecay%3.2f' % config['dict_decay'] if config['dict_decay'] else None, config['name_suffix'] if config['name_suffix'] else None, ] if item is not None]) # dogball if config['hashname']: return hashname(name) else: return name # A simple function to produce a unique experiment name from the animal hashes. def hashname(name): h = hash(name) a = h % len(animal_hash.a) h = h // len(animal_hash.a) b = h % len(animal_hash.b) h = h // len(animal_hash.c) c = h % len(animal_hash.c) return animal_hash.a[a] + animal_hash.b[b] + animal_hash.c[c] # Get GPU memory, -i is the index def query_gpu(indices): os.system('nvidia-smi -i 0 --query-gpu=memory.free --format=csv') # Convenience function to count the number of parameters in a module def count_parameters(module): print('Number of parameters: {}'.format( sum([p.data.nelement() for p in module.parameters()]))) # Convenience function to sample an index, not actually a 1-hot def sample_1hot(batch_size, num_classes, device='cuda'): return torch.randint(low=0, high=num_classes, size=(batch_size,), device=device, dtype=torch.int64, requires_grad=False) # A highly simplified convenience class for sampling from distributions # One could also use PyTorch's inbuilt distributions package. # Note that this class requires initialization to proceed as # x = Distribution(torch.randn(size)) # x.init_distribution(dist_type, **dist_kwargs) # x = x.to(device,dtype) # This is partially based on https://discuss.pytorch.org/t/subclassing-torch-tensor/23754/2 class Distribution(torch.Tensor): # Init the params of the distribution def init_distribution(self, dist_type, **kwargs): self.dist_type = dist_type self.dist_kwargs = kwargs if self.dist_type == 'normal': self.mean, self.var = kwargs['mean'], kwargs['var'] elif self.dist_type == 'categorical': self.num_categories = kwargs['num_categories'] def sample_(self): if self.dist_type == 'normal': self.normal_(self.mean, self.var) elif self.dist_type == 'categorical': self.random_(0, self.num_categories) # return self.variable # Silly hack: overwrite the to() method to wrap the new object # in a distribution as well def to(self, *args, **kwargs): new_obj = Distribution(self) new_obj.init_distribution(self.dist_type, **self.dist_kwargs) new_obj.data = super().to(*args, **kwargs) return new_obj # Convenience function to prepare a z and y vector def prepare_z_y(G_batch_size, dim_z, nclasses, device='cuda', fp16=False,z_var=1.0): z_ = Distribution(torch.randn(G_batch_size, dim_z, requires_grad=False)) z_.init_distribution('normal', mean=0, var=z_var) z_ = z_.to(device,torch.float16 if fp16 else torch.float32) if fp16: z_ = z_.half() y_ = Distribution(torch.zeros(G_batch_size, requires_grad=False)) y_.init_distribution('categorical',num_categories=nclasses) y_ = y_.to(device, torch.int64) return z_, y_ def initiate_standing_stats(net): for module in net.modules(): if hasattr(module, 'accumulate_standing'): module.reset_stats() module.accumulate_standing = True def accumulate_standing_stats(net, z, y, nclasses, num_accumulations=16): initiate_standing_stats(net) net.train() for i in range(num_accumulations): with torch.no_grad(): z.normal_() y.random_(0, nclasses) x = net(z, net.shared(y)) # No need to parallelize here unless using syncbn # Set to eval mode net.eval() # This version of Adam keeps an fp32 copy of the parameters and # does all of the parameter updates in fp32, while still doing the # forwards and backwards passes using fp16 (i.e. fp16 copies of the # parameters and fp16 activations). # # Note that this calls .float().cuda() on the params. import math from torch.optim.optimizer import Optimizer class Adam16(Optimizer): def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,weight_decay=0): defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay) params = list(params) super(Adam16, self).__init__(params, defaults) # Safety modification to make sure we floatify our state def load_state_dict(self, state_dict): super(Adam16, self).load_state_dict(state_dict) for group in self.param_groups: for p in group['params']: self.state[p]['exp_avg'] = self.state[p]['exp_avg'].float() self.state[p]['exp_avg_sq'] = self.state[p]['exp_avg_sq'].float() self.state[p]['fp32_p'] = self.state[p]['fp32_p'].float() def step(self, closure=None): """Performs a single optimization step. Arguments: closure (callable, optional): A closure that reevaluates the model and returns the loss. """ loss = None if closure is not None: loss = closure() for group in self.param_groups: for p in group['params']: if p.grad is None: continue grad = p.grad.data.float() state = self.state[p] # State initialization if len(state) == 0: state['step'] = 0 # Exponential moving average of gradient values state['exp_avg'] = grad.new().resize_as_(grad).zero_() # Exponential moving average of squared gradient values state['exp_avg_sq'] = grad.new().resize_as_(grad).zero_() # Fp32 copy of the weights state['fp32_p'] = p.data.float() exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] beta1, beta2 = group['betas'] state['step'] += 1 if group['weight_decay'] != 0: grad = grad.add(group['weight_decay'], state['fp32_p']) # Decay the first and second moment running average coefficient exp_avg.mul_(beta1).add_(1 - beta1, grad) exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) denom = exp_avg_sq.sqrt().add_(group['eps']) bias_correction1 = 1 - beta1 ** state['step'] bias_correction2 = 1 - beta2 ** state['step'] step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1 state['fp32_p'].addcdiv_(-step_size, exp_avg, denom) p.data = state['fp32_p'].half() return loss ================================================ FILE: FQ-BigGAN/vq_layer.py ================================================ import torch import torch.nn as nn import torch.nn.functional as F class Quantize(nn.Module): def __init__(self, dim, n_embed, commitment=1.0, decay=0.8, eps=1e-5): super().__init__() self.dim = dim self.n_embed = n_embed self.decay = decay self.eps = eps self.commitment = commitment embed = torch.randn(dim, n_embed) self.register_buffer('embed', embed) self.register_buffer('cluster_size', torch.zeros(n_embed)) self.register_buffer('embed_avg', embed.clone()) def forward(self, x, y=None): x = x.permute(0, 2, 3, 1).contiguous() input_shape = x.shape flatten = x.reshape(-1, self.dim) dist = ( flatten.pow(2).sum(1, keepdim=True) - 2 * flatten @ self.embed + self.embed.pow(2).sum(0, keepdim=True) ) _, embed_ind = (-dist).max(1) embed_onehot = F.one_hot(embed_ind, self.n_embed).type(flatten.dtype) embed_ind = embed_ind.view(*x.shape[:-1]) quantize = self.embed_code(embed_ind).view(input_shape) if self.training: self.cluster_size.data.mul_(self.decay).add_( 1 - self.decay, embed_onehot.sum(0) ) embed_sum = flatten.transpose(0, 1) @ embed_onehot self.embed_avg.data.mul_(self.decay).add_(1 - self.decay, embed_sum) n = self.cluster_size.sum() cluster_size = ( (self.cluster_size + self.eps) / (n + self.n_embed * self.eps) * n ) embed_normalized = self.embed_avg / cluster_size.unsqueeze(0) self.embed.data.copy_(embed_normalized) diff = self.commitment*torch.mean(torch.mean((quantize.detach() - x).pow(2), dim=(1,2)), dim=(1,), keepdim=True) quantize = x + (quantize - x).detach() avg_probs = torch.mean(embed_onehot, 0) perplexity = torch.exp(- torch.sum(avg_probs * torch.log(avg_probs + 1e-10))) return quantize.permute(0, 3, 1, 2).contiguous(), diff, perplexity def embed_code(self, embed_id): return F.embedding(embed_id, self.embed.transpose(0, 1)) # class VectorQuantizerEMA(nn.Module): # def __init__(self, num_embeddings, embedding_dim, commitment_cost, decay, epsilon=1e-5): # super(VectorQuantizerEMA, self).__init__() # self._embedding_dim = embedding_dim # self._num_embeddings = num_embeddings # self._embedding = nn.Embedding(self._num_embeddings, self._embedding_dim) # self._embedding.weight.data.normal_() # self._commitment_cost = commitment_cost # self.register_buffer('_ema_cluster_size', torch.zeros(num_embeddings)) # self._ema_w = nn.Parameter(torch.Tensor(num_embeddings, self._embedding_dim)) # self._ema_w.data.normal_() # self._decay = decay # self._epsilon = epsilon # def forward(self, inputs): # # convert inputs from BCHW -> BHWC # inputs = inputs.permute(0, 2, 3, 1).contiguous() # input_shape = inputs.shape # # Flatten input # flat_input = inputs.view(-1, self._embedding_dim) # # Calculate distances # distances = (torch.sum(flat_input ** 2, dim=1, keepdim=True) # + torch.sum(self._embedding.weight ** 2, dim=1) # - 2 * torch.matmul(flat_input, self._embedding.weight.t())) # # Encoding # encoding_indices = torch.argmin(distances, dim=1).unsqueeze(1) # encodings = torch.zeros(encoding_indices.shape[0], self._num_embeddings, device=inputs.device) # encodings.scatter_(1, encoding_indices, 1) # # Quantize and unflatten # quantized = torch.matmul(encodings, self._embedding.weight).view(input_shape) # # Use EMA to update the embedding vectors # if self.training: # self._ema_cluster_size = self._ema_cluster_size * self._decay + \ # (1 - self._decay) * torch.sum(encodings, 0) # # Laplace smoothing of the cluster size # n = torch.sum(self._ema_cluster_size.data) # self._ema_cluster_size = ( # (self._ema_cluster_size + self._epsilon) # / (n + self._num_embeddings * self._epsilon) * n) # dw = torch.matmul(encodings.t(), flat_input) # self._ema_w = nn.Parameter(self._ema_w * self._decay + (1 - self._decay) * dw) # self._embedding.weight = nn.Parameter(self._ema_w / self._ema_cluster_size.unsqueeze(1)) # # Loss # e_latent_loss = F.mse_loss(quantized.detach(), inputs) # loss = self._commitment_cost * e_latent_loss # # Straight Through Estimator # quantized = inputs + (quantized - inputs).detach() # avg_probs = torch.mean(encodings, dim=0) # perplexity = torch.exp(-torch.sum(avg_probs * torch.log(avg_probs + 1e-10))) # # convert quantized from BHWC -> BCHW # return loss, quantized.permute(0, 3, 1, 2).contiguous(), perplexity, encodings ================================================ FILE: FQ-StyleGAN/LICENSE.txt ================================================ Copyright (c) 2019, NVIDIA Corporation. All rights reserved. Nvidia Source Code License-NC ======================================================================= 1. Definitions "Licensor" means any person or entity that distributes its Work. "Software" means the original work of authorship made available under this License. "Work" means the Software and any additions to or derivative works of the Software that are made available under this License. "Nvidia Processors" means any central processing unit (CPU), graphics processing unit (GPU), field-programmable gate array (FPGA), application-specific integrated circuit (ASIC) or any combination thereof designed, made, sold, or provided by Nvidia or its affiliates. The terms "reproduce," "reproduction," "derivative works," and "distribution" have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work. Works, including the Software, are "made available" under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License. 2. License Grants 2.1 Copyright Grant. Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form. 3. Limitations 3.1 Redistribution. You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work. 3.2 Derivative Works. You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work ("Your Terms") only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself. 3.3 Use Limitation. The Work and any derivative works thereof only may be used or intended for use non-commercially. The Work or derivative works thereof may be used or intended for use by Nvidia or its affiliates commercially or non-commercially. As used herein, "non-commercially" means for research or evaluation purposes only. 3.4 Patent Claims. If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grants in Sections 2.1 and 2.2) will terminate immediately. 3.5 Trademarks. This License does not grant any rights to use any Licensor's or its affiliates' names, logos, or trademarks, except as necessary to reproduce the notices described in this License. 3.6 Termination. If you violate any term of this License, then your rights under this License (including the grants in Sections 2.1 and 2.2) will terminate immediately. 4. Disclaimer of Warranty. THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. 5. Limitation of Liability. EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. ======================================================================= ================================================ FILE: FQ-StyleGAN/dataset_tool.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Tool for creating multi-resolution TFRecords datasets.""" # pylint: disable=too-many-lines import os import sys import glob import argparse import threading import six.moves.queue as Queue # pylint: disable=import-error import traceback import numpy as np import tensorflow as tf import PIL.Image import dnnlib.tflib as tflib from training import dataset #---------------------------------------------------------------------------- def error(msg): print('Error: ' + msg) exit(1) #---------------------------------------------------------------------------- class TFRecordExporter: def __init__(self, tfrecord_dir, expected_images, print_progress=True, progress_interval=10): self.tfrecord_dir = tfrecord_dir self.tfr_prefix = os.path.join(self.tfrecord_dir, os.path.basename(self.tfrecord_dir)) self.expected_images = expected_images self.cur_images = 0 self.shape = None self.resolution_log2 = None self.tfr_writers = [] self.print_progress = print_progress self.progress_interval = progress_interval if self.print_progress: print('Creating dataset "%s"' % tfrecord_dir) if not os.path.isdir(self.tfrecord_dir): os.makedirs(self.tfrecord_dir) assert os.path.isdir(self.tfrecord_dir) def close(self): if self.print_progress: print('%-40s\r' % 'Flushing data...', end='', flush=True) for tfr_writer in self.tfr_writers: tfr_writer.close() self.tfr_writers = [] if self.print_progress: print('%-40s\r' % '', end='', flush=True) print('Added %d images.' % self.cur_images) def choose_shuffled_order(self): # Note: Images and labels must be added in shuffled order. order = np.arange(self.expected_images) np.random.RandomState(123).shuffle(order) return order def add_image(self, img): if self.print_progress and self.cur_images % self.progress_interval == 0: print('%d / %d\r' % (self.cur_images, self.expected_images), end='', flush=True) if self.shape is None: self.shape = img.shape self.resolution_log2 = int(np.log2(self.shape[1])) assert self.shape[0] in [1, 3] assert self.shape[1] == self.shape[2] assert self.shape[1] == 2**self.resolution_log2 tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE) for lod in range(self.resolution_log2 - 1): tfr_file = self.tfr_prefix + '-r%02d.tfrecords' % (self.resolution_log2 - lod) self.tfr_writers.append(tf.python_io.TFRecordWriter(tfr_file, tfr_opt)) assert img.shape == self.shape for lod, tfr_writer in enumerate(self.tfr_writers): if lod: img = img.astype(np.float32) img = (img[:, 0::2, 0::2] + img[:, 0::2, 1::2] + img[:, 1::2, 0::2] + img[:, 1::2, 1::2]) * 0.25 quant = np.rint(img).clip(0, 255).astype(np.uint8) ex = tf.train.Example(features=tf.train.Features(feature={ 'shape': tf.train.Feature(int64_list=tf.train.Int64List(value=quant.shape)), 'data': tf.train.Feature(bytes_list=tf.train.BytesList(value=[quant.tostring()]))})) tfr_writer.write(ex.SerializeToString()) self.cur_images += 1 def add_labels(self, labels): if self.print_progress: print('%-40s\r' % 'Saving labels...', end='', flush=True) assert labels.shape[0] == self.cur_images with open(self.tfr_prefix + '-rxx.labels', 'wb') as f: np.save(f, labels.astype(np.float32)) def __enter__(self): return self def __exit__(self, *args): self.close() #---------------------------------------------------------------------------- class ExceptionInfo(object): def __init__(self): self.value = sys.exc_info()[1] self.traceback = traceback.format_exc() #---------------------------------------------------------------------------- class WorkerThread(threading.Thread): def __init__(self, task_queue): threading.Thread.__init__(self) self.task_queue = task_queue def run(self): while True: func, args, result_queue = self.task_queue.get() if func is None: break try: result = func(*args) except: result = ExceptionInfo() result_queue.put((result, args)) #---------------------------------------------------------------------------- class ThreadPool(object): def __init__(self, num_threads): assert num_threads >= 1 self.task_queue = Queue.Queue() self.result_queues = dict() self.num_threads = num_threads for _idx in range(self.num_threads): thread = WorkerThread(self.task_queue) thread.daemon = True thread.start() def add_task(self, func, args=()): assert hasattr(func, '__call__') # must be a function if func not in self.result_queues: self.result_queues[func] = Queue.Queue() self.task_queue.put((func, args, self.result_queues[func])) def get_result(self, func): # returns (result, args) result, args = self.result_queues[func].get() if isinstance(result, ExceptionInfo): print('\n\nWorker thread caught an exception:\n' + result.traceback) raise result.value return result, args def finish(self): for _idx in range(self.num_threads): self.task_queue.put((None, (), None)) def __enter__(self): # for 'with' statement return self def __exit__(self, *excinfo): self.finish() def process_items_concurrently(self, item_iterator, process_func=lambda x: x, pre_func=lambda x: x, post_func=lambda x: x, max_items_in_flight=None): if max_items_in_flight is None: max_items_in_flight = self.num_threads * 4 assert max_items_in_flight >= 1 results = [] retire_idx = [0] def task_func(prepared, _idx): return process_func(prepared) def retire_result(): processed, (_prepared, idx) = self.get_result(task_func) results[idx] = processed while retire_idx[0] < len(results) and results[retire_idx[0]] is not None: yield post_func(results[retire_idx[0]]) results[retire_idx[0]] = None retire_idx[0] += 1 for idx, item in enumerate(item_iterator): prepared = pre_func(item) results.append(None) self.add_task(func=task_func, args=(prepared, idx)) while retire_idx[0] < idx - max_items_in_flight + 2: for res in retire_result(): yield res while retire_idx[0] < len(results): for res in retire_result(): yield res #---------------------------------------------------------------------------- def display(tfrecord_dir): print('Loading dataset "%s"' % tfrecord_dir) tflib.init_tf({'gpu_options.allow_growth': True}) dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size='full', repeat=False, shuffle_mb=0) tflib.init_uninitialized_vars() import cv2 # pip install opencv-python idx = 0 while True: try: images, labels = dset.get_minibatch_np(1) except tf.errors.OutOfRangeError: break if idx == 0: print('Displaying images') cv2.namedWindow('dataset_tool') print('Press SPACE or ENTER to advance, ESC to exit') print('\nidx = %-8d\nlabel = %s' % (idx, labels[0].tolist())) cv2.imshow('dataset_tool', images[0].transpose(1, 2, 0)[:, :, ::-1]) # CHW => HWC, RGB => BGR idx += 1 if cv2.waitKey() == 27: break print('\nDisplayed %d images.' % idx) #---------------------------------------------------------------------------- def extract(tfrecord_dir, output_dir): print('Loading dataset "%s"' % tfrecord_dir) tflib.init_tf({'gpu_options.allow_growth': True}) dset = dataset.TFRecordDataset(tfrecord_dir, max_label_size=0, repeat=False, shuffle_mb=0) tflib.init_uninitialized_vars() print('Extracting images to "%s"' % output_dir) if not os.path.isdir(output_dir): os.makedirs(output_dir) idx = 0 while True: if idx % 10 == 0: print('%d\r' % idx, end='', flush=True) try: images, _labels = dset.get_minibatch_np(1) except tf.errors.OutOfRangeError: break if images.shape[1] == 1: img = PIL.Image.fromarray(images[0][0], 'L') else: img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB') img.save(os.path.join(output_dir, 'img%08d.png' % idx)) idx += 1 print('Extracted %d images.' % idx) #---------------------------------------------------------------------------- def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels): max_label_size = 0 if ignore_labels else 'full' print('Loading dataset "%s"' % tfrecord_dir_a) tflib.init_tf({'gpu_options.allow_growth': True}) dset_a = dataset.TFRecordDataset(tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle_mb=0) print('Loading dataset "%s"' % tfrecord_dir_b) dset_b = dataset.TFRecordDataset(tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle_mb=0) tflib.init_uninitialized_vars() print('Comparing datasets') idx = 0 identical_images = 0 identical_labels = 0 while True: if idx % 100 == 0: print('%d\r' % idx, end='', flush=True) try: images_a, labels_a = dset_a.get_minibatch_np(1) except tf.errors.OutOfRangeError: images_a, labels_a = None, None try: images_b, labels_b = dset_b.get_minibatch_np(1) except tf.errors.OutOfRangeError: images_b, labels_b = None, None if images_a is None or images_b is None: if images_a is not None or images_b is not None: print('Datasets contain different number of images') break if images_a.shape == images_b.shape and np.all(images_a == images_b): identical_images += 1 else: print('Image %d is different' % idx) if labels_a.shape == labels_b.shape and np.all(labels_a == labels_b): identical_labels += 1 else: print('Label %d is different' % idx) idx += 1 print('Identical images: %d / %d' % (identical_images, idx)) if not ignore_labels: print('Identical labels: %d / %d' % (identical_labels, idx)) #---------------------------------------------------------------------------- def create_mnist(tfrecord_dir, mnist_dir): print('Loading MNIST from "%s"' % mnist_dir) import gzip with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: images = np.frombuffer(file.read(), np.uint8, offset=16) with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file: labels = np.frombuffer(file.read(), np.uint8, offset=8) images = images.reshape(-1, 1, 28, 28) images = np.pad(images, [(0,0), (0,0), (2,2), (2,2)], 'constant', constant_values=0) assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8 assert labels.shape == (60000,) and labels.dtype == np.uint8 assert np.min(images) == 0 and np.max(images) == 255 assert np.min(labels) == 0 and np.max(labels) == 9 onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) onehot[np.arange(labels.size), labels] = 1.0 with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: order = tfr.choose_shuffled_order() for idx in range(order.size): tfr.add_image(images[order[idx]]) tfr.add_labels(onehot[order]) #---------------------------------------------------------------------------- def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123): print('Loading MNIST from "%s"' % mnist_dir) import gzip with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: images = np.frombuffer(file.read(), np.uint8, offset=16) images = images.reshape(-1, 28, 28) images = np.pad(images, [(0,0), (2,2), (2,2)], 'constant', constant_values=0) assert images.shape == (60000, 32, 32) and images.dtype == np.uint8 assert np.min(images) == 0 and np.max(images) == 255 with TFRecordExporter(tfrecord_dir, num_images) as tfr: rnd = np.random.RandomState(random_seed) for _idx in range(num_images): tfr.add_image(images[rnd.randint(images.shape[0], size=3)]) #---------------------------------------------------------------------------- def create_cifar10(tfrecord_dir, cifar10_dir): print('Loading CIFAR-10 from "%s"' % cifar10_dir) import pickle images = [] labels = [] for batch in range(1, 6): with open(os.path.join(cifar10_dir, 'data_batch_%d' % batch), 'rb') as file: data = pickle.load(file, encoding='latin1') images.append(data['data'].reshape(-1, 3, 32, 32)) labels.append(data['labels']) images = np.concatenate(images) labels = np.concatenate(labels) assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 assert labels.shape == (50000,) and labels.dtype == np.int32 assert np.min(images) == 0 and np.max(images) == 255 assert np.min(labels) == 0 and np.max(labels) == 9 onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) onehot[np.arange(labels.size), labels] = 1.0 with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: order = tfr.choose_shuffled_order() for idx in range(order.size): tfr.add_image(images[order[idx]]) tfr.add_labels(onehot[order]) #---------------------------------------------------------------------------- def create_cifar100(tfrecord_dir, cifar100_dir): print('Loading CIFAR-100 from "%s"' % cifar100_dir) import pickle with open(os.path.join(cifar100_dir, 'train'), 'rb') as file: data = pickle.load(file, encoding='latin1') images = data['data'].reshape(-1, 3, 32, 32) labels = np.array(data['fine_labels']) assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 assert labels.shape == (50000,) and labels.dtype == np.int32 assert np.min(images) == 0 and np.max(images) == 255 assert np.min(labels) == 0 and np.max(labels) == 99 onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) onehot[np.arange(labels.size), labels] = 1.0 with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: order = tfr.choose_shuffled_order() for idx in range(order.size): tfr.add_image(images[order[idx]]) tfr.add_labels(onehot[order]) #---------------------------------------------------------------------------- def create_svhn(tfrecord_dir, svhn_dir): print('Loading SVHN from "%s"' % svhn_dir) import pickle images = [] labels = [] for batch in range(1, 4): with open(os.path.join(svhn_dir, 'train_%d.pkl' % batch), 'rb') as file: data = pickle.load(file, encoding='latin1') images.append(data[0]) labels.append(data[1]) images = np.concatenate(images) labels = np.concatenate(labels) assert images.shape == (73257, 3, 32, 32) and images.dtype == np.uint8 assert labels.shape == (73257,) and labels.dtype == np.uint8 assert np.min(images) == 0 and np.max(images) == 255 assert np.min(labels) == 0 and np.max(labels) == 9 onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) onehot[np.arange(labels.size), labels] = 1.0 with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: order = tfr.choose_shuffled_order() for idx in range(order.size): tfr.add_image(images[order[idx]]) tfr.add_labels(onehot[order]) #---------------------------------------------------------------------------- def create_lsun(tfrecord_dir, lmdb_dir, resolution=256, max_images=None): print('Loading LSUN dataset from "%s"' % lmdb_dir) import lmdb # pip install lmdb # pylint: disable=import-error import cv2 # pip install opencv-python import io with lmdb.open(lmdb_dir, readonly=True).begin(write=False) as txn: total_images = txn.stat()['entries'] # pylint: disable=no-value-for-parameter if max_images is None: max_images = total_images with TFRecordExporter(tfrecord_dir, max_images) as tfr: for _idx, (_key, value) in enumerate(txn.cursor()): try: try: img = cv2.imdecode(np.fromstring(value, dtype=np.uint8), 1) if img is None: raise IOError('cv2.imdecode failed') img = img[:, :, ::-1] # BGR => RGB except IOError: img = np.asarray(PIL.Image.open(io.BytesIO(value))) crop = np.min(img.shape[:2]) img = img[(img.shape[0] - crop) // 2 : (img.shape[0] + crop) // 2, (img.shape[1] - crop) // 2 : (img.shape[1] + crop) // 2] img = PIL.Image.fromarray(img, 'RGB') img = img.resize((resolution, resolution), PIL.Image.ANTIALIAS) img = np.asarray(img) img = img.transpose([2, 0, 1]) # HWC => CHW tfr.add_image(img) except: print(sys.exc_info()[1]) if tfr.cur_images == max_images: break #---------------------------------------------------------------------------- def create_lsun_wide(tfrecord_dir, lmdb_dir, width=512, height=384, max_images=None): assert width == 2 ** int(np.round(np.log2(width))) assert height <= width print('Loading LSUN dataset from "%s"' % lmdb_dir) import lmdb # pip install lmdb # pylint: disable=import-error import cv2 # pip install opencv-python import io with lmdb.open(lmdb_dir, readonly=True).begin(write=False) as txn: total_images = txn.stat()['entries'] # pylint: disable=no-value-for-parameter if max_images is None: max_images = total_images with TFRecordExporter(tfrecord_dir, max_images, print_progress=False) as tfr: for idx, (_key, value) in enumerate(txn.cursor()): try: try: img = cv2.imdecode(np.fromstring(value, dtype=np.uint8), 1) if img is None: raise IOError('cv2.imdecode failed') img = img[:, :, ::-1] # BGR => RGB except IOError: img = np.asarray(PIL.Image.open(io.BytesIO(value))) ch = int(np.round(width * img.shape[0] / img.shape[1])) if img.shape[1] < width or ch < height: continue img = img[(img.shape[0] - ch) // 2 : (img.shape[0] + ch) // 2] img = PIL.Image.fromarray(img, 'RGB') img = img.resize((width, height), PIL.Image.ANTIALIAS) img = np.asarray(img) img = img.transpose([2, 0, 1]) # HWC => CHW canvas = np.zeros([3, width, width], dtype=np.uint8) canvas[:, (width - height) // 2 : (width + height) // 2] = img tfr.add_image(canvas) print('\r%d / %d => %d ' % (idx + 1, total_images, tfr.cur_images), end='') except: print(sys.exc_info()[1]) if tfr.cur_images == max_images: break print() #---------------------------------------------------------------------------- def create_celeba(tfrecord_dir, celeba_dir, cx=89, cy=121): print('Loading CelebA from "%s"' % celeba_dir) glob_pattern = os.path.join(celeba_dir, 'img_align_celeba_png', '*.png') image_filenames = sorted(glob.glob(glob_pattern)) expected_images = 202599 if len(image_filenames) != expected_images: error('Expected to find %d images' % expected_images) with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: order = tfr.choose_shuffled_order() for idx in range(order.size): img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) assert img.shape == (218, 178, 3) img = img[cy - 64 : cy + 64, cx - 64 : cx + 64] img = img.transpose(2, 0, 1) # HWC => CHW tfr.add_image(img) #---------------------------------------------------------------------------- def create_from_images(tfrecord_dir, image_dir, shuffle): print('Loading images from "%s"' % image_dir) image_filenames = sorted(glob.glob(os.path.join(image_dir, '*'))) if len(image_filenames) == 0: error('No input images found') img = np.asarray(PIL.Image.open(image_filenames[0])) resolution = img.shape[0] channels = img.shape[2] if img.ndim == 3 else 1 if img.shape[1] != resolution: error('Input images must have the same width and height') if resolution != 2 ** int(np.floor(np.log2(resolution))): error('Input image resolution must be a power-of-two') if channels not in [1, 3]: error('Input images must be stored as RGB or grayscale') with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: order = tfr.choose_shuffled_order() if shuffle else np.arange(len(image_filenames)) for idx in range(order.size): img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) if channels == 1: img = img[np.newaxis, :, :] # HW => CHW else: img = img.transpose([2, 0, 1]) # HWC => CHW tfr.add_image(img) #---------------------------------------------------------------------------- def create_from_hdf5(tfrecord_dir, hdf5_filename, shuffle): print('Loading HDF5 archive from "%s"' % hdf5_filename) import h5py # conda install h5py with h5py.File(hdf5_filename, 'r') as hdf5_file: hdf5_data = max([value for key, value in hdf5_file.items() if key.startswith('data')], key=lambda lod: lod.shape[3]) with TFRecordExporter(tfrecord_dir, hdf5_data.shape[0]) as tfr: order = tfr.choose_shuffled_order() if shuffle else np.arange(hdf5_data.shape[0]) for idx in range(order.size): tfr.add_image(hdf5_data[order[idx]]) npy_filename = os.path.splitext(hdf5_filename)[0] + '-labels.npy' if os.path.isfile(npy_filename): tfr.add_labels(np.load(npy_filename)[order]) #---------------------------------------------------------------------------- def execute_cmdline(argv): prog = argv[0] parser = argparse.ArgumentParser( prog = prog, description = 'Tool for creating multi-resolution TFRecords datasets for StyleGAN and ProGAN.', epilog = 'Type "%s -h" for more information.' % prog) subparsers = parser.add_subparsers(dest='command') subparsers.required = True def add_command(cmd, desc, example=None): epilog = 'Example: %s %s' % (prog, example) if example is not None else None return subparsers.add_parser(cmd, description=desc, help=desc, epilog=epilog) p = add_command( 'display', 'Display images in dataset.', 'display datasets/mnist') p.add_argument( 'tfrecord_dir', help='Directory containing dataset') p = add_command( 'extract', 'Extract images from dataset.', 'extract datasets/mnist mnist-images') p.add_argument( 'tfrecord_dir', help='Directory containing dataset') p.add_argument( 'output_dir', help='Directory to extract the images into') p = add_command( 'compare', 'Compare two datasets.', 'compare datasets/mydataset datasets/mnist') p.add_argument( 'tfrecord_dir_a', help='Directory containing first dataset') p.add_argument( 'tfrecord_dir_b', help='Directory containing second dataset') p.add_argument( '--ignore_labels', help='Ignore labels (default: 0)', type=int, default=0) p = add_command( 'create_mnist', 'Create dataset for MNIST.', 'create_mnist datasets/mnist ~/downloads/mnist') p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') p.add_argument( 'mnist_dir', help='Directory containing MNIST') p = add_command( 'create_mnistrgb', 'Create dataset for MNIST-RGB.', 'create_mnistrgb datasets/mnistrgb ~/downloads/mnist') p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') p.add_argument( 'mnist_dir', help='Directory containing MNIST') p.add_argument( '--num_images', help='Number of composite images to create (default: 1000000)', type=int, default=1000000) p.add_argument( '--random_seed', help='Random seed (default: 123)', type=int, default=123) p = add_command( 'create_cifar10', 'Create dataset for CIFAR-10.', 'create_cifar10 datasets/cifar10 ~/downloads/cifar10') p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') p.add_argument( 'cifar10_dir', help='Directory containing CIFAR-10') p = add_command( 'create_cifar100', 'Create dataset for CIFAR-100.', 'create_cifar100 datasets/cifar100 ~/downloads/cifar100') p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') p.add_argument( 'cifar100_dir', help='Directory containing CIFAR-100') p = add_command( 'create_svhn', 'Create dataset for SVHN.', 'create_svhn datasets/svhn ~/downloads/svhn') p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') p.add_argument( 'svhn_dir', help='Directory containing SVHN') p = add_command( 'create_lsun', 'Create dataset for single LSUN category.', 'create_lsun datasets/lsun-car-100k ~/downloads/lsun/car_lmdb --resolution 256 --max_images 100000') p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') p.add_argument( 'lmdb_dir', help='Directory containing LMDB database') p.add_argument( '--resolution', help='Output resolution (default: 256)', type=int, default=256) p.add_argument( '--max_images', help='Maximum number of images (default: none)', type=int, default=None) p = add_command( 'create_lsun_wide', 'Create LSUN dataset with non-square aspect ratio.', 'create_lsun_wide datasets/lsun-car-512x384 ~/downloads/lsun/car_lmdb --width 512 --height 384') p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') p.add_argument( 'lmdb_dir', help='Directory containing LMDB database') p.add_argument( '--width', help='Output width (default: 512)', type=int, default=512) p.add_argument( '--height', help='Output height (default: 384)', type=int, default=384) p.add_argument( '--max_images', help='Maximum number of images (default: none)', type=int, default=None) p = add_command( 'create_celeba', 'Create dataset for CelebA.', 'create_celeba datasets/celeba ~/downloads/celeba') p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') p.add_argument( 'celeba_dir', help='Directory containing CelebA') p.add_argument( '--cx', help='Center X coordinate (default: 89)', type=int, default=89) p.add_argument( '--cy', help='Center Y coordinate (default: 121)', type=int, default=121) p = add_command( 'create_from_images', 'Create dataset from a directory full of images.', 'create_from_images datasets/mydataset myimagedir') p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') p.add_argument( 'image_dir', help='Directory containing the images') p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1) p = add_command( 'create_from_hdf5', 'Create dataset from legacy HDF5 archive.', 'create_from_hdf5 datasets/celebahq ~/downloads/celeba-hq-1024x1024.h5') p.add_argument( 'tfrecord_dir', help='New dataset directory to be created') p.add_argument( 'hdf5_filename', help='HDF5 archive containing the images') p.add_argument( '--shuffle', help='Randomize image order (default: 1)', type=int, default=1) args = parser.parse_args(argv[1:] if len(argv) > 1 else ['-h']) func = globals()[args.command] del args.command func(**vars(args)) #---------------------------------------------------------------------------- if __name__ == "__main__": execute_cmdline(sys.argv) #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/dnnlib/__init__.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html from . import submission from .submission.run_context import RunContext from .submission.submit import SubmitTarget from .submission.submit import PathType from .submission.submit import SubmitConfig from .submission.submit import submit_run from .submission.submit import get_path_from_template from .submission.submit import convert_path from .submission.submit import make_run_dir_path from .util import EasyDict submit_config: SubmitConfig = None # Package level variable for SubmitConfig which is only valid when inside the run function. ================================================ FILE: FQ-StyleGAN/dnnlib/submission/__init__.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html from . import run_context from . import submit ================================================ FILE: FQ-StyleGAN/dnnlib/submission/internal/__init__.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html from . import local ================================================ FILE: FQ-StyleGAN/dnnlib/submission/internal/local.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html class TargetOptions(): def __init__(self): self.do_not_copy_source_files = False class Target(): def __init__(self): pass def finalize_submit_config(self, submit_config, host_run_dir): print ('Local submit ', end='', flush=True) submit_config.run_dir = host_run_dir def submit(self, submit_config, host_run_dir): from ..submit import run_wrapper, convert_path print('- run_dir: %s' % convert_path(submit_config.run_dir), flush=True) return run_wrapper(submit_config) ================================================ FILE: FQ-StyleGAN/dnnlib/submission/run_context.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Helpers for managing the run/training loop.""" import datetime import json import os import pprint import time import types from typing import Any from . import submit # Singleton RunContext _run_context = None class RunContext(object): """Helper class for managing the run/training loop. The context will hide the implementation details of a basic run/training loop. It will set things up properly, tell if run should be stopped, and then cleans up. User should call update periodically and use should_stop to determine if run should be stopped. Args: submit_config: The SubmitConfig that is used for the current run. config_module: (deprecated) The whole config module that is used for the current run. """ def __init__(self, submit_config: submit.SubmitConfig, config_module: types.ModuleType = None): global _run_context # Only a single RunContext can be alive assert _run_context is None _run_context = self self.submit_config = submit_config self.should_stop_flag = False self.has_closed = False self.start_time = time.time() self.last_update_time = time.time() self.last_update_interval = 0.0 self.progress_monitor_file_path = None # vestigial config_module support just prints a warning if config_module is not None: print("RunContext.config_module parameter support has been removed.") # write out details about the run to a text file self.run_txt_data = {"task_name": submit_config.task_name, "host_name": submit_config.host_name, "start_time": datetime.datetime.now().isoformat(sep=" ")} with open(os.path.join(submit_config.run_dir, "run.txt"), "w") as f: pprint.pprint(self.run_txt_data, stream=f, indent=4, width=200, compact=False) def __enter__(self) -> "RunContext": return self def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: self.close() def update(self, loss: Any = 0, cur_epoch: Any = 0, max_epoch: Any = None) -> None: """Do general housekeeping and keep the state of the context up-to-date. Should be called often enough but not in a tight loop.""" assert not self.has_closed self.last_update_interval = time.time() - self.last_update_time self.last_update_time = time.time() if os.path.exists(os.path.join(self.submit_config.run_dir, "abort.txt")): self.should_stop_flag = True def should_stop(self) -> bool: """Tell whether a stopping condition has been triggered one way or another.""" return self.should_stop_flag def get_time_since_start(self) -> float: """How much time has passed since the creation of the context.""" return time.time() - self.start_time def get_time_since_last_update(self) -> float: """How much time has passed since the last call to update.""" return time.time() - self.last_update_time def get_last_update_interval(self) -> float: """How much time passed between the previous two calls to update.""" return self.last_update_interval def close(self) -> None: """Close the context and clean up. Should only be called once.""" if not self.has_closed: # update the run.txt with stopping time self.run_txt_data["stop_time"] = datetime.datetime.now().isoformat(sep=" ") with open(os.path.join(self.submit_config.run_dir, "run.txt"), "w") as f: pprint.pprint(self.run_txt_data, stream=f, indent=4, width=200, compact=False) self.has_closed = True # detach the global singleton global _run_context if _run_context is self: _run_context = None @staticmethod def get(): import dnnlib if _run_context is not None: return _run_context return RunContext(dnnlib.submit_config) ================================================ FILE: FQ-StyleGAN/dnnlib/submission/submit.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Submit a function to be run either locally or in a computing cluster.""" import copy import inspect import os import pathlib import pickle import platform import pprint import re import shutil import sys import time import traceback from enum import Enum from .. import util from ..util import EasyDict from . import internal class SubmitTarget(Enum): """The target where the function should be run. LOCAL: Run it locally. """ LOCAL = 1 class PathType(Enum): """Determines in which format should a path be formatted. WINDOWS: Format with Windows style. LINUX: Format with Linux/Posix style. AUTO: Use current OS type to select either WINDOWS or LINUX. """ WINDOWS = 1 LINUX = 2 AUTO = 3 class PlatformExtras: """A mixed bag of values used by dnnlib heuristics. Attributes: data_reader_buffer_size: Used by DataReader to size internal shared memory buffers. data_reader_process_count: Number of worker processes to spawn (zero for single thread operation) """ def __init__(self): self.data_reader_buffer_size = 1<<30 # 1 GB self.data_reader_process_count = 0 # single threaded default _user_name_override = None class SubmitConfig(util.EasyDict): """Strongly typed config dict needed to submit runs. Attributes: run_dir_root: Path to the run dir root. Can be optionally templated with tags. Needs to always be run through get_path_from_template. run_desc: Description of the run. Will be used in the run dir and task name. run_dir_ignore: List of file patterns used to ignore files when copying files to the run dir. run_dir_extra_files: List of (abs_path, rel_path) tuples of file paths. rel_path root will be the src directory inside the run dir. submit_target: Submit target enum value. Used to select where the run is actually launched. num_gpus: Number of GPUs used/requested for the run. print_info: Whether to print debug information when submitting. local.do_not_copy_source_files: Do not copy source files from the working directory to the run dir. run_id: Automatically populated value during submit. run_name: Automatically populated value during submit. run_dir: Automatically populated value during submit. run_func_name: Automatically populated value during submit. run_func_kwargs: Automatically populated value during submit. user_name: Automatically populated value during submit. Can be set by the user which will then override the automatic value. task_name: Automatically populated value during submit. host_name: Automatically populated value during submit. platform_extras: Automatically populated values during submit. Used by various dnnlib libraries such as the DataReader class. """ def __init__(self): super().__init__() # run (set these) self.run_dir_root = "" # should always be passed through get_path_from_template self.run_desc = "" self.run_dir_ignore = ["__pycache__", "*.pyproj", "*.sln", "*.suo", ".cache", ".idea", ".vs", ".vscode", "_cudacache"] self.run_dir_extra_files = [] # submit (set these) self.submit_target = SubmitTarget.LOCAL self.num_gpus = 1 self.print_info = False self.nvprof = False self.local = internal.local.TargetOptions() self.datasets = [] # (automatically populated) self.run_id = None self.run_name = None self.run_dir = None self.run_func_name = None self.run_func_kwargs = None self.user_name = None self.task_name = None self.host_name = "localhost" self.platform_extras = PlatformExtras() def get_path_from_template(path_template: str, path_type: PathType = PathType.AUTO) -> str: """Replace tags in the given path template and return either Windows or Linux formatted path.""" # automatically select path type depending on running OS if path_type == PathType.AUTO: if platform.system() == "Windows": path_type = PathType.WINDOWS elif platform.system() == "Linux": path_type = PathType.LINUX else: raise RuntimeError("Unknown platform") path_template = path_template.replace("", get_user_name()) # return correctly formatted path if path_type == PathType.WINDOWS: return str(pathlib.PureWindowsPath(path_template)) elif path_type == PathType.LINUX: return str(pathlib.PurePosixPath(path_template)) else: raise RuntimeError("Unknown platform") def get_template_from_path(path: str) -> str: """Convert a normal path back to its template representation.""" path = path.replace("\\", "/") return path def convert_path(path: str, path_type: PathType = PathType.AUTO) -> str: """Convert a normal path to template and the convert it back to a normal path with given path type.""" path_template = get_template_from_path(path) path = get_path_from_template(path_template, path_type) return path def set_user_name_override(name: str) -> None: """Set the global username override value.""" global _user_name_override _user_name_override = name def get_user_name(): """Get the current user name.""" if _user_name_override is not None: return _user_name_override elif platform.system() == "Windows": return os.getlogin() elif platform.system() == "Linux": try: import pwd return pwd.getpwuid(os.geteuid()).pw_name except: return "unknown" else: raise RuntimeError("Unknown platform") def make_run_dir_path(*paths): """Make a path/filename that resides under the current submit run_dir. Args: *paths: Path components to be passed to os.path.join Returns: A file/dirname rooted at submit_config.run_dir. If there's no submit_config or run_dir, the base directory is the current working directory. E.g., `os.path.join(dnnlib.submit_config.run_dir, "output.txt"))` """ import dnnlib if (dnnlib.submit_config is None) or (dnnlib.submit_config.run_dir is None): return os.path.join(os.getcwd(), *paths) return os.path.join(dnnlib.submit_config.run_dir, *paths) def _create_run_dir_local(submit_config: SubmitConfig) -> str: """Create a new run dir with increasing ID number at the start.""" run_dir_root = get_path_from_template(submit_config.run_dir_root, PathType.AUTO) if not os.path.exists(run_dir_root): os.makedirs(run_dir_root) submit_config.run_id = _get_next_run_id_local(run_dir_root) submit_config.run_name = "{0:05d}-{1}".format(submit_config.run_id, submit_config.run_desc) run_dir = os.path.join(run_dir_root, submit_config.run_name) if os.path.exists(run_dir): raise RuntimeError("The run dir already exists! ({0})".format(run_dir)) os.makedirs(run_dir) return run_dir def _get_next_run_id_local(run_dir_root: str) -> int: """Reads all directory names in a given directory (non-recursive) and returns the next (increasing) run id. Assumes IDs are numbers at the start of the directory names.""" dir_names = [d for d in os.listdir(run_dir_root) if os.path.isdir(os.path.join(run_dir_root, d))] r = re.compile("^\\d+") # match one or more digits at the start of the string run_id = 0 for dir_name in dir_names: m = r.match(dir_name) if m is not None: i = int(m.group()) run_id = max(run_id, i + 1) return run_id def _populate_run_dir(submit_config: SubmitConfig, run_dir: str) -> None: """Copy all necessary files into the run dir. Assumes that the dir exists, is local, and is writable.""" pickle.dump(submit_config, open(os.path.join(run_dir, "submit_config.pkl"), "wb")) with open(os.path.join(run_dir, "submit_config.txt"), "w") as f: pprint.pprint(submit_config, stream=f, indent=4, width=200, compact=False) if (submit_config.submit_target == SubmitTarget.LOCAL) and submit_config.local.do_not_copy_source_files: return files = [] run_func_module_dir_path = util.get_module_dir_by_obj_name(submit_config.run_func_name) assert '.' in submit_config.run_func_name for _idx in range(submit_config.run_func_name.count('.') - 1): run_func_module_dir_path = os.path.dirname(run_func_module_dir_path) files += util.list_dir_recursively_with_ignore(run_func_module_dir_path, ignores=submit_config.run_dir_ignore, add_base_to_relative=False) dnnlib_module_dir_path = util.get_module_dir_by_obj_name("dnnlib") files += util.list_dir_recursively_with_ignore(dnnlib_module_dir_path, ignores=submit_config.run_dir_ignore, add_base_to_relative=True) files += submit_config.run_dir_extra_files files = [(f[0], os.path.join(run_dir, "src", f[1])) for f in files] files += [(os.path.join(dnnlib_module_dir_path, "submission", "internal", "run.py"), os.path.join(run_dir, "run.py"))] util.copy_files_and_create_dirs(files) def run_wrapper(submit_config: SubmitConfig) -> None: """Wrap the actual run function call for handling logging, exceptions, typing, etc.""" is_local = submit_config.submit_target == SubmitTarget.LOCAL # when running locally, redirect stderr to stdout, log stdout to a file, and force flushing if is_local: logger = util.Logger(file_name=os.path.join(submit_config.run_dir, "log.txt"), file_mode="w", should_flush=True) else: # when running in a cluster, redirect stderr to stdout, and just force flushing (log writing is handled by run.sh) logger = util.Logger(file_name=None, should_flush=True) import dnnlib dnnlib.submit_config = submit_config exit_with_errcode = False try: print("dnnlib: Running {0}() on {1}...".format(submit_config.run_func_name, submit_config.host_name)) start_time = time.time() run_func_obj = util.get_obj_by_name(submit_config.run_func_name) assert callable(run_func_obj) sig = inspect.signature(run_func_obj) if 'submit_config' in sig.parameters: run_func_obj(submit_config=submit_config, **submit_config.run_func_kwargs) else: run_func_obj(**submit_config.run_func_kwargs) print("dnnlib: Finished {0}() in {1}.".format(submit_config.run_func_name, util.format_time(time.time() - start_time))) except: if is_local: raise else: traceback.print_exc() log_src = os.path.join(submit_config.run_dir, "log.txt") log_dst = os.path.join(get_path_from_template(submit_config.run_dir_root), "{0}-error.txt".format(submit_config.run_name)) shutil.copyfile(log_src, log_dst) # Defer sys.exit(1) to happen after we close the logs and create a _finished.txt exit_with_errcode = True finally: open(os.path.join(submit_config.run_dir, "_finished.txt"), "w").close() dnnlib.RunContext.get().close() dnnlib.submit_config = None logger.close() # If we hit an error, get out of the script now and signal the error # to whatever process that started this script. if exit_with_errcode: sys.exit(1) return submit_config def submit_run(submit_config: SubmitConfig, run_func_name: str, **run_func_kwargs) -> None: """Create a run dir, gather files related to the run, copy files to the run dir, and launch the run in appropriate place.""" submit_config = copy.deepcopy(submit_config) submit_target = submit_config.submit_target farm = None if submit_target == SubmitTarget.LOCAL: farm = internal.local.Target() assert farm is not None # unknown target # Disallow submitting jobs with zero num_gpus. if (submit_config.num_gpus is None) or (submit_config.num_gpus == 0): raise RuntimeError("submit_config.num_gpus must be set to a non-zero value") if submit_config.user_name is None: submit_config.user_name = get_user_name() submit_config.run_func_name = run_func_name submit_config.run_func_kwargs = run_func_kwargs #-------------------------------------------------------------------- # Prepare submission by populating the run dir #-------------------------------------------------------------------- host_run_dir = _create_run_dir_local(submit_config) submit_config.task_name = "{0}-{1:05d}-{2}".format(submit_config.user_name, submit_config.run_id, submit_config.run_desc) docker_valid_name_regex = "^[a-zA-Z0-9][a-zA-Z0-9_.-]+$" if not re.match(docker_valid_name_regex, submit_config.task_name): raise RuntimeError("Invalid task name. Probable reason: unacceptable characters in your submit_config.run_desc. Task name must be accepted by the following regex: " + docker_valid_name_regex + ", got " + submit_config.task_name) # Farm specific preparations for a submit farm.finalize_submit_config(submit_config, host_run_dir) _populate_run_dir(submit_config, host_run_dir) return farm.submit(submit_config, host_run_dir) ================================================ FILE: FQ-StyleGAN/dnnlib/tflib/__init__.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html from . import autosummary from . import network from . import optimizer from . import tfutil from . import custom_ops from .tfutil import * from .network import Network from .optimizer import Optimizer from .custom_ops import get_plugin ================================================ FILE: FQ-StyleGAN/dnnlib/tflib/autosummary.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Helper for adding automatically tracked values to Tensorboard. Autosummary creates an identity op that internally keeps track of the input values and automatically shows up in TensorBoard. The reported value represents an average over input components. The average is accumulated constantly over time and flushed when save_summaries() is called. Notes: - The output tensor must be used as an input for something else in the graph. Otherwise, the autosummary op will not get executed, and the average value will not get accumulated. - It is perfectly fine to include autosummaries with the same name in several places throughout the graph, even if they are executed concurrently. - It is ok to also pass in a python scalar or numpy array. In this case, it is added to the average immediately. """ from collections import OrderedDict import numpy as np import tensorflow as tf from tensorboard import summary as summary_lib from tensorboard.plugins.custom_scalar import layout_pb2 from . import tfutil from .tfutil import TfExpression from .tfutil import TfExpressionEx # Enable "Custom scalars" tab in TensorBoard for advanced formatting. # Disabled by default to reduce tfevents file size. enable_custom_scalars = False _dtype = tf.float64 _vars = OrderedDict() # name => [var, ...] _immediate = OrderedDict() # name => update_op, update_value _finalized = False _merge_op = None def _create_var(name: str, value_expr: TfExpression) -> TfExpression: """Internal helper for creating autosummary accumulators.""" assert not _finalized name_id = name.replace("/", "_") v = tf.cast(value_expr, _dtype) if v.shape.is_fully_defined(): size = np.prod(v.shape.as_list()) size_expr = tf.constant(size, dtype=_dtype) else: size = None size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype)) if size == 1: if v.shape.ndims != 0: v = tf.reshape(v, []) v = [size_expr, v, tf.square(v)] else: v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))] v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype)) with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None): var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)] update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v)) if name in _vars: _vars[name].append(var) else: _vars[name] = [var] return update_op def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx: """Create a new autosummary. Args: name: Name to use in TensorBoard value: TensorFlow expression or python value to track passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node. Example use of the passthru mechanism: n = autosummary('l2loss', loss, passthru=n) This is a shorthand for the following code: with tf.control_dependencies([autosummary('l2loss', loss)]): n = tf.identity(n) """ tfutil.assert_tf_initialized() name_id = name.replace("/", "_") if tfutil.is_tf_expression(value): with tf.name_scope("summary_" + name_id), tf.device(value.device): condition = tf.convert_to_tensor(condition, name='condition') update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op) with tf.control_dependencies([update_op]): return tf.identity(value if passthru is None else passthru) else: # python scalar or numpy array assert not tfutil.is_tf_expression(passthru) assert not tfutil.is_tf_expression(condition) if condition: if name not in _immediate: with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None): update_value = tf.placeholder(_dtype) update_op = _create_var(name, update_value) _immediate[name] = update_op, update_value update_op, update_value = _immediate[name] tfutil.run(update_op, {update_value: value}) return value if passthru is None else passthru def finalize_autosummaries() -> None: """Create the necessary ops to include autosummaries in TensorBoard report. Note: This should be done only once per graph. """ global _finalized tfutil.assert_tf_initialized() if _finalized: return None _finalized = True tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list]) # Create summary ops. with tf.device(None), tf.control_dependencies(None): for name, vars_list in _vars.items(): name_id = name.replace("/", "_") with tfutil.absolute_name_scope("Autosummary/" + name_id): moments = tf.add_n(vars_list) moments /= moments[0] with tf.control_dependencies([moments]): # read before resetting reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list] with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting mean = moments[1] std = tf.sqrt(moments[2] - tf.square(moments[1])) tf.summary.scalar(name, mean) if enable_custom_scalars: tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std) tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std) # Setup layout for custom scalars. layout = None if enable_custom_scalars: cat_dict = OrderedDict() for series_name in sorted(_vars.keys()): p = series_name.split("/") cat = p[0] if len(p) >= 2 else "" chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1] if cat not in cat_dict: cat_dict[cat] = OrderedDict() if chart not in cat_dict[cat]: cat_dict[cat][chart] = [] cat_dict[cat][chart].append(series_name) categories = [] for cat_name, chart_dict in cat_dict.items(): charts = [] for chart_name, series_names in chart_dict.items(): series = [] for series_name in series_names: series.append(layout_pb2.MarginChartContent.Series( value=series_name, lower="xCustomScalars/" + series_name + "/margin_lo", upper="xCustomScalars/" + series_name + "/margin_hi")) margin = layout_pb2.MarginChartContent(series=series) charts.append(layout_pb2.Chart(title=chart_name, margin=margin)) categories.append(layout_pb2.Category(title=cat_name, chart=charts)) layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories)) return layout def save_summaries(file_writer, global_step=None): """Call FileWriter.add_summary() with all summaries in the default graph, automatically finalizing and merging them on the first call. """ global _merge_op tfutil.assert_tf_initialized() if _merge_op is None: layout = finalize_autosummaries() if layout is not None: file_writer.add_summary(layout) with tf.device(None), tf.control_dependencies(None): _merge_op = tf.summary.merge_all() file_writer.add_summary(_merge_op.eval(), global_step) ================================================ FILE: FQ-StyleGAN/dnnlib/tflib/custom_ops.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """TensorFlow custom ops builder. """ import os import re import uuid import hashlib import tempfile import shutil import tensorflow as tf from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module #---------------------------------------------------------------------------- # Global options. cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache') cuda_cache_version_tag = 'v1' do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe! verbose = True # Print status messages to stdout. compiler_bindir_search_path = [ 'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.14.26428/bin/Hostx64/x64', 'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64', 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin', ] #---------------------------------------------------------------------------- # Internal helper funcs. def _find_compiler_bindir(): for compiler_path in compiler_bindir_search_path: if os.path.isdir(compiler_path): return compiler_path return None def _get_compute_cap(device): caps_str = device.physical_device_desc m = re.search('compute capability: (\\d+).(\\d+)', caps_str) major = m.group(1) minor = m.group(2) return (major, minor) def _get_cuda_gpu_arch_string(): gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU'] if len(gpus) == 0: raise RuntimeError('No GPU devices found') (major, minor) = _get_compute_cap(gpus[0]) return 'sm_%s%s' % (major, minor) def _run_cmd(cmd): with os.popen(cmd) as pipe: output = pipe.read() status = pipe.close() if status is not None: raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output)) def _prepare_nvcc_cli(opts): cmd = 'nvcc --std=c++11 -DNDEBUG ' + opts.strip() cmd += ' --disable-warnings' cmd += ' --include-path "%s"' % tf.sysconfig.get_include() cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src') cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl') cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive') compiler_bindir = _find_compiler_bindir() if compiler_bindir is None: # Require that _find_compiler_bindir succeeds on Windows. Allow # nvcc to use whatever is the default on Linux. if os.name == 'nt': raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__) else: cmd += ' --compiler-bindir "%s"' % compiler_bindir cmd += ' 2>&1' return cmd #---------------------------------------------------------------------------- # Main entry point. _plugin_cache = dict() def get_plugin(cuda_file): cuda_file_base = os.path.basename(cuda_file) cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base) # Already in cache? if cuda_file in _plugin_cache: return _plugin_cache[cuda_file] # Setup plugin. if verbose: print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True) try: # Hash CUDA source. md5 = hashlib.md5() with open(cuda_file, 'rb') as f: md5.update(f.read()) md5.update(b'\n') # Hash headers included by the CUDA code by running it through the preprocessor. if not do_not_hash_included_headers: if verbose: print('Preprocessing... ', end='', flush=True) with tempfile.TemporaryDirectory() as tmp_dir: tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext) _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))) with open(tmp_file, 'rb') as f: bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros good_file_str = ('"' + cuda_file_base + '"').encode('utf-8') for ln in f: if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas ln = ln.replace(bad_file_str, good_file_str) md5.update(ln) md5.update(b'\n') # Select compiler options. compile_opts = '' if os.name == 'nt': compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib') elif os.name == 'posix': compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so') compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=1\'' else: assert False # not Windows or Linux, w00t? compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string() compile_opts += ' --use_fast_math' nvcc_cmd = _prepare_nvcc_cli(compile_opts) # Hash build configuration. md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n') md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n') md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n') # Compile if not already compiled. bin_file_ext = '.dll' if os.name == 'nt' else '.so' bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext) if not os.path.isfile(bin_file): if verbose: print('Compiling... ', end='', flush=True) with tempfile.TemporaryDirectory() as tmp_dir: tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext) _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)) os.makedirs(cuda_cache_path, exist_ok=True) intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext) shutil.copyfile(tmp_file, intermediate_file) os.rename(intermediate_file, bin_file) # atomic # Load. if verbose: print('Loading... ', end='', flush=True) plugin = tf.load_op_library(bin_file) # Add to cache. _plugin_cache[cuda_file] = plugin if verbose: print('Done.', flush=True) return plugin except: if verbose: print('Failed!', flush=True) raise #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/dnnlib/tflib/network.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Helper for managing networks.""" import types import inspect import re import uuid import sys import numpy as np import tensorflow as tf from collections import OrderedDict from typing import Any, List, Tuple, Union from . import tfutil from .. import util from .tfutil import TfExpression, TfExpressionEx _import_handlers = [] # Custom import handlers for dealing with legacy data in pickle import. _import_module_src = dict() # Source code for temporary modules created during pickle import. def import_handler(handler_func): """Function decorator for declaring custom import handlers.""" _import_handlers.append(handler_func) return handler_func class Network: """Generic network abstraction. Acts as a convenience wrapper for a parameterized network construction function, providing several utility methods and convenient access to the inputs/outputs/weights. Network objects can be safely pickled and unpickled for long-term archival purposes. The pickling works reliably as long as the underlying network construction function is defined in a standalone Python module that has no side effects or application-specific imports. Args: name: Network name. Used to select TensorFlow name and variable scopes. func_name: Fully qualified name of the underlying network construction function, or a top-level function object. static_kwargs: Keyword arguments to be passed in to the network construction function. Attributes: name: User-specified name, defaults to build func name if None. scope: Unique TensorFlow scope containing template graph and variables, derived from the user-specified name. static_kwargs: Arguments passed to the user-supplied build func. components: Container for sub-networks. Passed to the build func, and retained between calls. num_inputs: Number of input tensors. num_outputs: Number of output tensors. input_shapes: Input tensor shapes (NC or NCHW), including minibatch dimension. output_shapes: Output tensor shapes (NC or NCHW), including minibatch dimension. input_shape: Short-hand for input_shapes[0]. output_shape: Short-hand for output_shapes[0]. input_templates: Input placeholders in the template graph. output_templates: Output tensors in the template graph. input_names: Name string for each input. output_names: Name string for each output. own_vars: Variables defined by this network (local_name => var), excluding sub-networks. vars: All variables (local_name => var). trainables: All trainable variables (local_name => var). var_global_to_local: Mapping from variable global names to local names. """ def __init__(self, name: str = None, func_name: Any = None, **static_kwargs): tfutil.assert_tf_initialized() assert isinstance(name, str) or name is None assert func_name is not None assert isinstance(func_name, str) or util.is_top_level_function(func_name) assert util.is_pickleable(static_kwargs) self._init_fields() self.name = name self.static_kwargs = util.EasyDict(static_kwargs) # Locate the user-specified network build function. if util.is_top_level_function(func_name): func_name = util.get_top_level_function_name(func_name) module, self._build_func_name = util.get_module_from_obj_name(func_name) self._build_func = util.get_obj_from_module(module, self._build_func_name) assert callable(self._build_func) # Dig up source code for the module containing the build function. self._build_module_src = _import_module_src.get(module, None) if self._build_module_src is None: self._build_module_src = inspect.getsource(module) # Init TensorFlow graph. self._init_graph() self.reset_own_vars() def _init_fields(self) -> None: self.name = None self.scope = None self.static_kwargs = util.EasyDict() self.components = util.EasyDict() self.num_inputs = 0 self.num_outputs = 0 self.input_shapes = [[]] self.output_shapes = [[]] self.input_shape = [] self.output_shape = [] self.input_templates = [] self.output_templates = [] self.input_names = [] self.output_names = [] self.own_vars = OrderedDict() self.vars = OrderedDict() self.trainables = OrderedDict() self.var_global_to_local = OrderedDict() self._build_func = None # User-supplied build function that constructs the network. self._build_func_name = None # Name of the build function. self._build_module_src = None # Full source code of the module containing the build function. self._run_cache = dict() # Cached graph data for Network.run(). def _init_graph(self) -> None: # Collect inputs. self.input_names = [] for param in inspect.signature(self._build_func).parameters.values(): if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty: self.input_names.append(param.name) self.num_inputs = len(self.input_names) assert self.num_inputs >= 1 # Choose name and scope. if self.name is None: self.name = self._build_func_name assert re.match("^[A-Za-z0-9_.\\-]*$", self.name) with tf.name_scope(None): self.scope = tf.get_default_graph().unique_name(self.name, mark_as_used=True) # Finalize build func kwargs. build_kwargs = dict(self.static_kwargs) build_kwargs["is_template_graph"] = True build_kwargs["components"] = self.components # Build template graph. with tfutil.absolute_variable_scope(self.scope, reuse=False), tfutil.absolute_name_scope(self.scope): # ignore surrounding scopes assert tf.get_variable_scope().name == self.scope assert tf.get_default_graph().get_name_scope() == self.scope with tf.control_dependencies(None): # ignore surrounding control dependencies self.input_templates = [tf.placeholder(tf.float32, name=name) for name in self.input_names] out_expr = self._build_func(*self.input_templates, **build_kwargs) # Collect outputs. assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) self.output_templates = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) self.num_outputs = len(self.output_templates) assert self.num_outputs >= 1 assert all(tfutil.is_tf_expression(t) for t in self.output_templates) # Perform sanity checks. if any(t.shape.ndims is None for t in self.input_templates): raise ValueError("Network input shapes not defined. Please call x.set_shape() for each input.") if any(t.shape.ndims is None for t in self.output_templates): raise ValueError("Network output shapes not defined. Please call x.set_shape() where applicable.") if any(not isinstance(comp, Network) for comp in self.components.values()): raise ValueError("Components of a Network must be Networks themselves.") if len(self.components) != len(set(comp.name for comp in self.components.values())): raise ValueError("Components of a Network must have unique names.") # List inputs and outputs. self.input_shapes = [t.shape.as_list() for t in self.input_templates] self.output_shapes = [t.shape.as_list() for t in self.output_templates] self.input_shape = self.input_shapes[0] self.output_shape = self.output_shapes[0] self.output_names = [t.name.split("/")[-1].split(":")[0] for t in self.output_templates] # List variables. self.own_vars = OrderedDict((var.name[len(self.scope) + 1:].split(":")[0], var) for var in tf.global_variables(self.scope + "/")) self.vars = OrderedDict(self.own_vars) self.vars.update((comp.name + "/" + name, var) for comp in self.components.values() for name, var in comp.vars.items()) self.trainables = OrderedDict((name, var) for name, var in self.vars.items() if var.trainable) self.var_global_to_local = OrderedDict((var.name.split(":")[0], name) for name, var in self.vars.items()) def reset_own_vars(self) -> None: """Re-initialize all variables of this network, excluding sub-networks.""" tfutil.run([var.initializer for var in self.own_vars.values()]) def reset_vars(self) -> None: """Re-initialize all variables of this network, including sub-networks.""" tfutil.run([var.initializer for var in self.vars.values()]) def reset_trainables(self) -> None: """Re-initialize all trainable variables of this network, including sub-networks.""" tfutil.run([var.initializer for var in self.trainables.values()]) def get_output_for(self, *in_expr: TfExpression, return_as_list: bool = False, **dynamic_kwargs) -> Union[TfExpression, List[TfExpression]]: """Construct TensorFlow expression(s) for the output(s) of this network, given the input expression(s).""" assert len(in_expr) == self.num_inputs assert not all(expr is None for expr in in_expr) # Finalize build func kwargs. build_kwargs = dict(self.static_kwargs) build_kwargs.update(dynamic_kwargs) build_kwargs["is_template_graph"] = False build_kwargs["components"] = self.components # Build TensorFlow graph to evaluate the network. with tfutil.absolute_variable_scope(self.scope, reuse=True), tf.name_scope(self.name): assert tf.get_variable_scope().name == self.scope valid_inputs = [expr for expr in in_expr if expr is not None] final_inputs = [] for expr, name, shape in zip(in_expr, self.input_names, self.input_shapes): if expr is not None: expr = tf.identity(expr, name=name) else: expr = tf.zeros([tf.shape(valid_inputs[0])[0]] + shape[1:], name=name) final_inputs.append(expr) out_expr = self._build_func(*final_inputs, **build_kwargs) # Propagate input shapes back to the user-specified expressions. for expr, final in zip(in_expr, final_inputs): if isinstance(expr, tf.Tensor): expr.set_shape(final.shape) # Express outputs in the desired format. assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) if return_as_list: out_expr = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) return out_expr def get_var_local_name(self, var_or_global_name: Union[TfExpression, str]) -> str: """Get the local name of a given variable, without any surrounding name scopes.""" assert tfutil.is_tf_expression(var_or_global_name) or isinstance(var_or_global_name, str) global_name = var_or_global_name if isinstance(var_or_global_name, str) else var_or_global_name.name return self.var_global_to_local[global_name] def find_var(self, var_or_local_name: Union[TfExpression, str]) -> TfExpression: """Find variable by local or global name.""" assert tfutil.is_tf_expression(var_or_local_name) or isinstance(var_or_local_name, str) return self.vars[var_or_local_name] if isinstance(var_or_local_name, str) else var_or_local_name def get_var(self, var_or_local_name: Union[TfExpression, str]) -> np.ndarray: """Get the value of a given variable as NumPy array. Note: This method is very inefficient -- prefer to use tflib.run(list_of_vars) whenever possible.""" return self.find_var(var_or_local_name).eval() def set_var(self, var_or_local_name: Union[TfExpression, str], new_value: Union[int, float, np.ndarray]) -> None: """Set the value of a given variable based on the given NumPy array. Note: This method is very inefficient -- prefer to use tflib.set_vars() whenever possible.""" tfutil.set_vars({self.find_var(var_or_local_name): new_value}) def __getstate__(self) -> dict: """Pickle export.""" state = dict() state["version"] = 4 state["name"] = self.name state["static_kwargs"] = dict(self.static_kwargs) state["components"] = dict(self.components) state["build_module_src"] = self._build_module_src state["build_func_name"] = self._build_func_name state["variables"] = list(zip(self.own_vars.keys(), tfutil.run(list(self.own_vars.values())))) return state def __setstate__(self, state: dict) -> None: """Pickle import.""" # pylint: disable=attribute-defined-outside-init tfutil.assert_tf_initialized() self._init_fields() # Execute custom import handlers. for handler in _import_handlers: state = handler(state) # Set basic fields. assert state["version"] in [2, 3, 4] self.name = state["name"] self.static_kwargs = util.EasyDict(state["static_kwargs"]) self.components = util.EasyDict(state.get("components", {})) self._build_module_src = state["build_module_src"] self._build_func_name = state["build_func_name"] # Create temporary module from the imported source code. module_name = "_tflib_network_import_" + uuid.uuid4().hex module = types.ModuleType(module_name) sys.modules[module_name] = module _import_module_src[module] = self._build_module_src exec(self._build_module_src, module.__dict__) # pylint: disable=exec-used # Locate network build function in the temporary module. self._build_func = util.get_obj_from_module(module, self._build_func_name) assert callable(self._build_func) # Init TensorFlow graph. self._init_graph() self.reset_own_vars() tfutil.set_vars({self.find_var(name): value for name, value in state["variables"]}) def clone(self, name: str = None, **new_static_kwargs) -> "Network": """Create a clone of this network with its own copy of the variables.""" # pylint: disable=protected-access net = object.__new__(Network) net._init_fields() net.name = name if name is not None else self.name net.static_kwargs = util.EasyDict(self.static_kwargs) net.static_kwargs.update(new_static_kwargs) net._build_module_src = self._build_module_src net._build_func_name = self._build_func_name net._build_func = self._build_func net._init_graph() net.copy_vars_from(self) return net def copy_own_vars_from(self, src_net: "Network") -> None: """Copy the values of all variables from the given network, excluding sub-networks.""" names = [name for name in self.own_vars.keys() if name in src_net.own_vars] tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) def copy_vars_from(self, src_net: "Network") -> None: """Copy the values of all variables from the given network, including sub-networks.""" names = [name for name in self.vars.keys() if name in src_net.vars] tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) def copy_trainables_from(self, src_net: "Network") -> None: """Copy the values of all trainable variables from the given network, including sub-networks.""" names = [name for name in self.trainables.keys() if name in src_net.trainables] tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) def convert(self, new_func_name: str, new_name: str = None, **new_static_kwargs) -> "Network": """Create new network with the given parameters, and copy all variables from this network.""" if new_name is None: new_name = self.name static_kwargs = dict(self.static_kwargs) static_kwargs.update(new_static_kwargs) net = Network(name=new_name, func_name=new_func_name, **static_kwargs) net.copy_vars_from(self) return net def setup_as_moving_average_of(self, src_net: "Network", beta: TfExpressionEx = 0.99, beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation: """Construct a TensorFlow op that updates the variables of this network to be slightly closer to those of the given network.""" with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"): ops = [] for name, var in self.vars.items(): if name in src_net.vars: cur_beta = beta if name in self.trainables else beta_nontrainable new_value = tfutil.lerp(src_net.vars[name], var, cur_beta) ops.append(var.assign(new_value)) return tf.group(*ops) def run(self, *in_arrays: Tuple[Union[np.ndarray, None], ...], input_transform: dict = None, output_transform: dict = None, return_as_list: bool = False, print_progress: bool = False, minibatch_size: int = None, num_gpus: int = 1, assume_frozen: bool = False, **dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]: """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s). Args: input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network. The dict must contain a 'func' field that points to a top-level function. The function is called with the input TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network. The dict must contain a 'func' field that points to a top-level function. The function is called with the output TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. print_progress: Print progress to the console? Useful for very large input arrays. minibatch_size: Maximum minibatch size to use, None = disable batching. num_gpus: Number of GPUs to use. assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls. dynamic_kwargs: Additional keyword arguments to be passed into the network build function. """ assert len(in_arrays) == self.num_inputs assert not all(arr is None for arr in in_arrays) assert input_transform is None or util.is_top_level_function(input_transform["func"]) assert output_transform is None or util.is_top_level_function(output_transform["func"]) output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs) num_items = in_arrays[0].shape[0] if minibatch_size is None: minibatch_size = num_items # Construct unique hash key from all arguments that affect the TensorFlow graph. key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs) def unwind_key(obj): if isinstance(obj, dict): return [(key, unwind_key(value)) for key, value in sorted(obj.items())] if callable(obj): return util.get_top_level_function_name(obj) return obj key = repr(unwind_key(key)) # Build graph. if key not in self._run_cache: with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None): with tf.device("/cpu:0"): in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names] in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr])) out_split = [] for gpu in range(num_gpus): with tf.device("/gpu:%d" % gpu): net_gpu = self.clone() if assume_frozen else self in_gpu = in_split[gpu] if input_transform is not None: in_kwargs = dict(input_transform) in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs) in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu) assert len(in_gpu) == self.num_inputs out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs) if output_transform is not None: out_kwargs = dict(output_transform) out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs) out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu) assert len(out_gpu) == self.num_outputs out_split.append(out_gpu) with tf.device("/cpu:0"): out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)] self._run_cache[key] = in_expr, out_expr # Run minibatches. in_expr, out_expr = self._run_cache[key] out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr] for mb_begin in range(0, num_items, minibatch_size): if print_progress: print("\r%d / %d" % (mb_begin, num_items), end="") mb_end = min(mb_begin + minibatch_size, num_items) mb_num = mb_end - mb_begin mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)] mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in))) for dst, src in zip(out_arrays, mb_out): dst[mb_begin: mb_end] = src # Done. if print_progress: print("\r%d / %d" % (num_items, num_items)) if not return_as_list: out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays) return out_arrays def list_ops(self) -> List[TfExpression]: include_prefix = self.scope + "/" exclude_prefix = include_prefix + "_" ops = tf.get_default_graph().get_operations() ops = [op for op in ops if op.name.startswith(include_prefix)] ops = [op for op in ops if not op.name.startswith(exclude_prefix)] return ops def list_layers(self) -> List[Tuple[str, TfExpression, List[TfExpression]]]: """Returns a list of (layer_name, output_expr, trainable_vars) tuples corresponding to individual layers of the network. Mainly intended to be used for reporting.""" layers = [] def recurse(scope, parent_ops, parent_vars, level): # Ignore specific patterns. if any(p in scope for p in ["/Shape", "/strided_slice", "/Cast", "/concat", "/Assign"]): return # Filter ops and vars by scope. global_prefix = scope + "/" local_prefix = global_prefix[len(self.scope) + 1:] cur_ops = [op for op in parent_ops if op.name.startswith(global_prefix) or op.name == global_prefix[:-1]] cur_vars = [(name, var) for name, var in parent_vars if name.startswith(local_prefix) or name == local_prefix[:-1]] if not cur_ops and not cur_vars: return # Filter out all ops related to variables. for var in [op for op in cur_ops if op.type.startswith("Variable")]: var_prefix = var.name + "/" cur_ops = [op for op in cur_ops if not op.name.startswith(var_prefix)] # Scope does not contain ops as immediate children => recurse deeper. contains_direct_ops = any("/" not in op.name[len(global_prefix):] and op.type not in ["Identity", "Cast", "Transpose"] for op in cur_ops) if (level == 0 or not contains_direct_ops) and (len(cur_ops) + len(cur_vars)) > 1: visited = set() for rel_name in [op.name[len(global_prefix):] for op in cur_ops] + [name[len(local_prefix):] for name, _var in cur_vars]: token = rel_name.split("/")[0] if token not in visited: recurse(global_prefix + token, cur_ops, cur_vars, level + 1) visited.add(token) return # Report layer. layer_name = scope[len(self.scope) + 1:] layer_output = cur_ops[-1].outputs[0] if cur_ops else cur_vars[-1][1] layer_trainables = [var for _name, var in cur_vars if var.trainable] layers.append((layer_name, layer_output, layer_trainables)) recurse(self.scope, self.list_ops(), list(self.vars.items()), 0) return layers def print_layers(self, title: str = None, hide_layers_with_no_params: bool = False) -> None: """Print a summary table of the network structure.""" rows = [[title if title is not None else self.name, "Params", "OutputShape", "WeightShape"]] rows += [["---"] * 4] total_params = 0 for layer_name, layer_output, layer_trainables in self.list_layers(): num_params = sum(int(np.prod(var.shape.as_list())) for var in layer_trainables) weights = [var for var in layer_trainables if var.name.endswith("/weight:0")] weights.sort(key=lambda x: len(x.name)) if len(weights) == 0 and len(layer_trainables) == 1: weights = layer_trainables total_params += num_params if not hide_layers_with_no_params or num_params != 0: num_params_str = str(num_params) if num_params > 0 else "-" output_shape_str = str(layer_output.shape) weight_shape_str = str(weights[0].shape) if len(weights) >= 1 else "-" rows += [[layer_name, num_params_str, output_shape_str, weight_shape_str]] rows += [["---"] * 4] rows += [["Total", str(total_params), "", ""]] widths = [max(len(cell) for cell in column) for column in zip(*rows)] print() for row in rows: print(" ".join(cell + " " * (width - len(cell)) for cell, width in zip(row, widths))) print() def setup_weight_histograms(self, title: str = None) -> None: """Construct summary ops to include histograms of all trainable parameters in TensorBoard.""" if title is None: title = self.name with tf.name_scope(None), tf.device(None), tf.control_dependencies(None): for local_name, var in self.trainables.items(): if "/" in local_name: p = local_name.split("/") name = title + "_" + p[-1] + "/" + "_".join(p[:-1]) else: name = title + "_toplevel/" + local_name tf.summary.histogram(name, var) #---------------------------------------------------------------------------- # Backwards-compatible emulation of legacy output transformation in Network.run(). _print_legacy_warning = True def _handle_legacy_output_transforms(output_transform, dynamic_kwargs): global _print_legacy_warning legacy_kwargs = ["out_mul", "out_add", "out_shrink", "out_dtype"] if not any(kwarg in dynamic_kwargs for kwarg in legacy_kwargs): return output_transform, dynamic_kwargs if _print_legacy_warning: _print_legacy_warning = False print() print("WARNING: Old-style output transformations in Network.run() are deprecated.") print("Consider using 'output_transform=dict(func=tflib.convert_images_to_uint8)'") print("instead of 'out_mul=127.5, out_add=127.5, out_dtype=np.uint8'.") print() assert output_transform is None new_kwargs = dict(dynamic_kwargs) new_transform = {kwarg: new_kwargs.pop(kwarg) for kwarg in legacy_kwargs if kwarg in dynamic_kwargs} new_transform["func"] = _legacy_output_transform_func return new_transform, new_kwargs def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None): if out_mul != 1.0: expr = [x * out_mul for x in expr] if out_add != 0.0: expr = [x + out_add for x in expr] if out_shrink > 1: ksize = [1, 1, out_shrink, out_shrink] expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr] if out_dtype is not None: if tf.as_dtype(out_dtype).is_integer: expr = [tf.round(x) for x in expr] expr = [tf.saturate_cast(x, out_dtype) for x in expr] return expr ================================================ FILE: FQ-StyleGAN/dnnlib/tflib/ops/__init__.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html # empty ================================================ FILE: FQ-StyleGAN/dnnlib/tflib/ops/fused_bias_act.cu ================================================ // Copyright (c) 2019, NVIDIA Corporation. All rights reserved. // // This work is made available under the Nvidia Source Code License-NC. // To view a copy of this license, visit // https://nvlabs.github.io/stylegan2/license.html #define EIGEN_USE_GPU #define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/shape_inference.h" #include using namespace tensorflow; using namespace tensorflow::shape_inference; #define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) //------------------------------------------------------------------------ // CUDA kernel. template struct FusedBiasActKernelParams { const T* x; // [sizeX] const T* b; // [sizeB] or NULL const T* ref; // [sizeX] or NULL T* y; // [sizeX] int grad; int axis; int act; float alpha; float gain; int sizeX; int sizeB; int stepB; int loopX; }; template static __global__ void FusedBiasActKernel(const FusedBiasActKernelParams p) { const float expRange = 80.0f; const float halfExpRange = 40.0f; const float seluScale = 1.0507009873554804934193349852946f; const float seluAlpha = 1.6732632423543772848170429916717f; // Loop over elements. int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x; for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x) { // Load and apply bias. float x = (float)p.x[xi]; if (p.b) x += (float)p.b[(xi / p.stepB) % p.sizeB]; float ref = (p.ref) ? (float)p.ref[xi] : 0.0f; if (p.gain != 0.0f & p.act != 9) ref /= p.gain; // Evaluate activation func. float y; switch (p.act * 10 + p.grad) { // linear default: case 10: y = x; break; case 11: y = x; break; case 12: y = 0.0f; break; // relu case 20: y = (x > 0.0f) ? x : 0.0f; break; case 21: y = (ref > 0.0f) ? x : 0.0f; break; case 22: y = 0.0f; break; // lrelu case 30: y = (x > 0.0f) ? x : x * p.alpha; break; case 31: y = (ref > 0.0f) ? x : x * p.alpha; break; case 32: y = 0.0f; break; // tanh case 40: { float c = expf(x); float d = 1.0f / c; y = (x < -expRange) ? -1.0f : (x > expRange) ? 1.0f : (c - d) / (c + d); } break; case 41: y = x * (1.0f - ref * ref); break; case 42: y = x * (1.0f - ref * ref) * (-2.0f * ref); break; // sigmoid case 50: y = (x < -expRange) ? 0.0f : 1.0f / (expf(-x) + 1.0f); break; case 51: y = x * ref * (1.0f - ref); break; case 52: y = x * ref * (1.0f - ref) * (1.0f - 2.0f * ref); break; // elu case 60: y = (x >= 0.0f) ? x : expf(x) - 1.0f; break; case 61: y = (ref >= 0.0f) ? x : x * (ref + 1.0f); break; case 62: y = (ref >= 0.0f) ? 0.0f : x * (ref + 1.0f); break; // selu case 70: y = (x >= 0.0f) ? seluScale * x : (seluScale * seluAlpha) * (expf(x) - 1.0f); break; case 71: y = (ref >= 0.0f) ? x * seluScale : x * (ref + seluScale * seluAlpha); break; case 72: y = (ref >= 0.0f) ? 0.0f : x * (ref + seluScale * seluAlpha); break; // softplus case 80: y = (x > expRange) ? x : logf(expf(x) + 1.0f); break; case 81: y = x * (1.0f - expf(-ref)); break; case 82: { float c = expf(-ref); y = x * c * (1.0f - c); } break; // swish case 90: y = (x < -expRange) ? 0.0f : x / (expf(-x) + 1.0f); break; case 91: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? x : x * c * (ref + d) / (d * d); } break; case 92: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? 0.0f : x * c * (ref * (2.0f - d) + 2.0f * d) / (d * d * d); } break; } // Apply gain and store. p.y[xi] = (T)(y * p.gain); } } //------------------------------------------------------------------------ // TensorFlow op. template struct FusedBiasActOp : public OpKernel { FusedBiasActKernelParams m_attribs; FusedBiasActOp(OpKernelConstruction* ctx) : OpKernel(ctx) { memset(&m_attribs, 0, sizeof(m_attribs)); OP_REQUIRES_OK(ctx, ctx->GetAttr("grad", &m_attribs.grad)); OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &m_attribs.axis)); OP_REQUIRES_OK(ctx, ctx->GetAttr("act", &m_attribs.act)); OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &m_attribs.alpha)); OP_REQUIRES_OK(ctx, ctx->GetAttr("gain", &m_attribs.gain)); OP_REQUIRES(ctx, m_attribs.grad >= 0, errors::InvalidArgument("grad must be non-negative")); OP_REQUIRES(ctx, m_attribs.axis >= 0, errors::InvalidArgument("axis must be non-negative")); OP_REQUIRES(ctx, m_attribs.act >= 0, errors::InvalidArgument("act must be non-negative")); } void Compute(OpKernelContext* ctx) { FusedBiasActKernelParams p = m_attribs; cudaStream_t stream = ctx->eigen_device().stream(); const Tensor& x = ctx->input(0); // [...] const Tensor& b = ctx->input(1); // [sizeB] or [0] const Tensor& ref = ctx->input(2); // x.shape or [0] p.x = x.flat().data(); p.b = (b.NumElements()) ? b.flat().data() : NULL; p.ref = (ref.NumElements()) ? ref.flat().data() : NULL; OP_REQUIRES(ctx, b.NumElements() == 0 || m_attribs.axis < x.dims(), errors::InvalidArgument("axis out of bounds")); OP_REQUIRES(ctx, b.dims() == 1, errors::InvalidArgument("b must have rank 1")); OP_REQUIRES(ctx, b.NumElements() == 0 || b.NumElements() == x.dim_size(m_attribs.axis), errors::InvalidArgument("b has wrong number of elements")); OP_REQUIRES(ctx, ref.NumElements() == ((p.grad == 0) ? 0 : x.NumElements()), errors::InvalidArgument("ref has wrong number of elements")); OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("x is too large")); p.sizeX = (int)x.NumElements(); p.sizeB = (int)b.NumElements(); p.stepB = 1; for (int i = m_attribs.axis + 1; i < x.dims(); i++) p.stepB *= (int)x.dim_size(i); Tensor* y = NULL; // x.shape OP_REQUIRES_OK(ctx, ctx->allocate_output(0, x.shape(), &y)); p.y = y->flat().data(); p.loopX = 4; int blockSize = 4 * 32; int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1; void* args[] = {&p}; OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel((void*)FusedBiasActKernel, gridSize, blockSize, args, 0, stream)); } }; REGISTER_OP("FusedBiasAct") .Input ("x: T") .Input ("b: T") .Input ("ref: T") .Output ("y: T") .Attr ("T: {float, half}") .Attr ("grad: int = 0") .Attr ("axis: int = 1") .Attr ("act: int = 0") .Attr ("alpha: float = 0.0") .Attr ("gain: float = 1.0"); REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp); REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp); //------------------------------------------------------------------------ ================================================ FILE: FQ-StyleGAN/dnnlib/tflib/ops/fused_bias_act.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Custom TensorFlow ops for efficient bias and activation.""" import os import numpy as np import tensorflow as tf from .. import custom_ops from ...util import EasyDict def _get_plugin(): return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') #---------------------------------------------------------------------------- activation_funcs = { 'linear': EasyDict(func=lambda x, **_: x, def_alpha=None, def_gain=1.0, cuda_idx=1, ref='y', zero_2nd_grad=True), 'relu': EasyDict(func=lambda x, **_: tf.nn.relu(x), def_alpha=None, def_gain=np.sqrt(2), cuda_idx=2, ref='y', zero_2nd_grad=True), 'lrelu': EasyDict(func=lambda x, alpha, **_: tf.nn.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', zero_2nd_grad=True), 'tanh': EasyDict(func=lambda x, **_: tf.nn.tanh(x), def_alpha=None, def_gain=1.0, cuda_idx=4, ref='y', zero_2nd_grad=False), 'sigmoid': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x), def_alpha=None, def_gain=1.0, cuda_idx=5, ref='y', zero_2nd_grad=False), 'elu': EasyDict(func=lambda x, **_: tf.nn.elu(x), def_alpha=None, def_gain=1.0, cuda_idx=6, ref='y', zero_2nd_grad=False), 'selu': EasyDict(func=lambda x, **_: tf.nn.selu(x), def_alpha=None, def_gain=1.0, cuda_idx=7, ref='y', zero_2nd_grad=False), 'softplus': EasyDict(func=lambda x, **_: tf.nn.softplus(x), def_alpha=None, def_gain=1.0, cuda_idx=8, ref='y', zero_2nd_grad=False), 'swish': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x) * x, def_alpha=None, def_gain=np.sqrt(2), cuda_idx=9, ref='x', zero_2nd_grad=False), } #---------------------------------------------------------------------------- def fused_bias_act(x, b=None, axis=1, act='linear', alpha=None, gain=None, impl='cuda'): r"""Fused bias and activation function. Adds bias `b` to activation tensor `x`, evaluates activation function `act`, and scales the result by `gain`. Each of the steps is optional. In most cases, the fused op is considerably more efficient than performing the same calculation using standard TensorFlow ops. It supports first and second order gradients, but not third order gradients. Args: x: Input activation tensor. Can have any shape, but if `b` is defined, the dimension corresponding to `axis`, as well as the rank, must be known. b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type as `x`. The shape must be known, and it must match the dimension of `x` corresponding to `axis`. axis: The dimension in `x` corresponding to the elements of `b`. The value of `axis` is ignored if `b` is not specified. act: Name of the activation function to evaluate, or `"linear"` to disable. Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc. See `activation_funcs` for a full list. `None` is not allowed. alpha: Shape parameter for the activation function, or `None` to use the default. gain: Scaling factor for the output tensor, or `None` to use default. See `activation_funcs` for the default scaling of each activation function. If unsure, consider specifying `1.0`. impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). Returns: Tensor of the same shape and datatype as `x`. """ impl_dict = { 'ref': _fused_bias_act_ref, 'cuda': _fused_bias_act_cuda, } return impl_dict[impl](x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain) #---------------------------------------------------------------------------- def _fused_bias_act_ref(x, b, axis, act, alpha, gain): """Slow reference implementation of `fused_bias_act()` using standard TensorFlow ops.""" # Validate arguments. x = tf.convert_to_tensor(x) b = tf.convert_to_tensor(b) if b is not None else tf.constant([], dtype=x.dtype) act_spec = activation_funcs[act] assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) assert b.shape[0] == 0 or 0 <= axis < x.shape.rank if alpha is None: alpha = act_spec.def_alpha if gain is None: gain = act_spec.def_gain # Add bias. if b.shape[0] != 0: x += tf.reshape(b, [-1 if i == axis else 1 for i in range(x.shape.rank)]) # Evaluate activation function. x = act_spec.func(x, alpha=alpha) # Scale by gain. if gain != 1: x *= gain return x #---------------------------------------------------------------------------- def _fused_bias_act_cuda(x, b, axis, act, alpha, gain): """Fast CUDA implementation of `fused_bias_act()` using custom ops.""" # Validate arguments. x = tf.convert_to_tensor(x) empty_tensor = tf.constant([], dtype=x.dtype) b = tf.convert_to_tensor(b) if b is not None else empty_tensor act_spec = activation_funcs[act] assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) assert b.shape[0] == 0 or 0 <= axis < x.shape.rank if alpha is None: alpha = act_spec.def_alpha if gain is None: gain = act_spec.def_gain # Special cases. if act == 'linear' and b is None and gain == 1.0: return x if act_spec.cuda_idx is None: return _fused_bias_act_ref(x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain) # CUDA kernel. cuda_kernel = _get_plugin().fused_bias_act cuda_kwargs = dict(axis=axis, act=act_spec.cuda_idx, alpha=alpha, gain=gain) # Forward pass: y = func(x, b). def func_y(x, b): y = cuda_kernel(x=x, b=b, ref=empty_tensor, grad=0, **cuda_kwargs) y.set_shape(x.shape) return y # Backward pass: dx, db = grad(dy, x, y) def grad_dx(dy, x, y): ref = {'x': x, 'y': y}[act_spec.ref] dx = cuda_kernel(x=dy, b=empty_tensor, ref=ref, grad=1, **cuda_kwargs) dx.set_shape(x.shape) return dx def grad_db(dx): if b.shape[0] == 0: return empty_tensor db = dx if axis < x.shape.rank - 1: db = tf.reduce_sum(db, list(range(axis + 1, x.shape.rank))) if axis > 0: db = tf.reduce_sum(db, list(range(axis))) db.set_shape(b.shape) return db # Second order gradients: d_dy, d_x = grad2(d_dx, d_db, x, y) def grad2_d_dy(d_dx, d_db, x, y): ref = {'x': x, 'y': y}[act_spec.ref] d_dy = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=1, **cuda_kwargs) d_dy.set_shape(x.shape) return d_dy def grad2_d_x(d_dx, d_db, x, y): ref = {'x': x, 'y': y}[act_spec.ref] d_x = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=2, **cuda_kwargs) d_x.set_shape(x.shape) return d_x # Fast version for piecewise-linear activation funcs. @tf.custom_gradient def func_zero_2nd_grad(x, b): y = func_y(x, b) @tf.custom_gradient def grad(dy): dx = grad_dx(dy, x, y) db = grad_db(dx) def grad2(d_dx, d_db): d_dy = grad2_d_dy(d_dx, d_db, x, y) return d_dy return (dx, db), grad2 return y, grad # Slow version for general activation funcs. @tf.custom_gradient def func_nonzero_2nd_grad(x, b): y = func_y(x, b) def grad_wrap(dy): @tf.custom_gradient def grad_impl(dy, x): dx = grad_dx(dy, x, y) db = grad_db(dx) def grad2(d_dx, d_db): d_dy = grad2_d_dy(d_dx, d_db, x, y) d_x = grad2_d_x(d_dx, d_db, x, y) return d_dy, d_x return (dx, db), grad2 return grad_impl(dy, x) return y, grad_wrap # Which version to use? if act_spec.zero_2nd_grad: return func_zero_2nd_grad(x, b) return func_nonzero_2nd_grad(x, b) #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/dnnlib/tflib/ops/upfirdn_2d.cu ================================================ // Copyright (c) 2019, NVIDIA Corporation. All rights reserved. // // This work is made available under the Nvidia Source Code License-NC. // To view a copy of this license, visit // https://nvlabs.github.io/stylegan2/license.html #define EIGEN_USE_GPU #define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/shape_inference.h" #include using namespace tensorflow; using namespace tensorflow::shape_inference; //------------------------------------------------------------------------ // Helpers. #define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) static __host__ __device__ __forceinline__ int floorDiv(int a, int b) { int c = a / b; if (c * b > a) c--; return c; } //------------------------------------------------------------------------ // CUDA kernel params. template struct UpFirDn2DKernelParams { const T* x; // [majorDim, inH, inW, minorDim] const T* k; // [kernelH, kernelW] T* y; // [majorDim, outH, outW, minorDim] int upx; int upy; int downx; int downy; int padx0; int padx1; int pady0; int pady1; int majorDim; int inH; int inW; int minorDim; int kernelH; int kernelW; int outH; int outW; int loopMajor; int loopX; }; //------------------------------------------------------------------------ // General CUDA implementation for large filter kernels. template static __global__ void UpFirDn2DKernel_large(const UpFirDn2DKernelParams p) { // Calculate thread index. int minorIdx = blockIdx.x * blockDim.x + threadIdx.x; int outY = minorIdx / p.minorDim; minorIdx -= outY * p.minorDim; int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y; int majorIdxBase = blockIdx.z * p.loopMajor; if (outXBase >= p.outW || outY >= p.outH || majorIdxBase >= p.majorDim) return; // Setup Y receptive field. int midY = outY * p.downy + p.upy - 1 - p.pady0; int inY = min(max(floorDiv(midY, p.upy), 0), p.inH); int h = min(max(floorDiv(midY + p.kernelH, p.upy), 0), p.inH) - inY; int kernelY = midY + p.kernelH - (inY + 1) * p.upy; // Loop over majorDim and outX. for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor && majorIdx < p.majorDim; loopMajor++, majorIdx++) for (int loopX = 0, outX = outXBase; loopX < p.loopX && outX < p.outW; loopX++, outX += blockDim.y) { // Setup X receptive field. int midX = outX * p.downx + p.upx - 1 - p.padx0; int inX = min(max(floorDiv(midX, p.upx), 0), p.inW); int w = min(max(floorDiv(midX + p.kernelW, p.upx), 0), p.inW) - inX; int kernelX = midX + p.kernelW - (inX + 1) * p.upx; // Initialize pointers. const T* xp = &p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; const T* kp = &p.k[kernelY * p.kernelW + kernelX]; int xpx = p.minorDim; int kpx = -p.upx; int xpy = p.inW * p.minorDim; int kpy = -p.upy * p.kernelW; // Inner loop. float v = 0.0f; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { v += (float)(*xp) * (float)(*kp); xp += xpx; kp += kpx; } xp += xpy - w * xpx; kp += kpy - w * kpx; } // Store result. p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; } } //------------------------------------------------------------------------ // Specialized CUDA implementation for small filter kernels. template static __global__ void UpFirDn2DKernel_small(const UpFirDn2DKernelParams p) { //assert(kernelW % upx == 0); //assert(kernelH % upy == 0); const int tileInW = ((tileOutW - 1) * downx + kernelW - 1) / upx + 1; const int tileInH = ((tileOutH - 1) * downy + kernelH - 1) / upy + 1; __shared__ volatile float sk[kernelH][kernelW]; __shared__ volatile float sx[tileInH][tileInW]; // Calculate tile index. int minorIdx = blockIdx.x; int tileOutY = minorIdx / p.minorDim; minorIdx -= tileOutY * p.minorDim; tileOutY *= tileOutH; int tileOutXBase = blockIdx.y * p.loopX * tileOutW; int majorIdxBase = blockIdx.z * p.loopMajor; if (tileOutXBase >= p.outW | tileOutY >= p.outH | majorIdxBase >= p.majorDim) return; // Load filter kernel (flipped). for (int tapIdx = threadIdx.x; tapIdx < kernelH * kernelW; tapIdx += blockDim.x) { int ky = tapIdx / kernelW; int kx = tapIdx - ky * kernelW; float v = 0.0f; if (kx < p.kernelW & ky < p.kernelH) v = (float)p.k[(p.kernelH - 1 - ky) * p.kernelW + (p.kernelW - 1 - kx)]; sk[ky][kx] = v; } // Loop over majorDim and outX. for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor & majorIdx < p.majorDim; loopMajor++, majorIdx++) for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outW; loopX++, tileOutX += tileOutW) { // Load input pixels. int tileMidX = tileOutX * downx + upx - 1 - p.padx0; int tileMidY = tileOutY * downy + upy - 1 - p.pady0; int tileInX = floorDiv(tileMidX, upx); int tileInY = floorDiv(tileMidY, upy); __syncthreads(); for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW; inIdx += blockDim.x) { int relInY = inIdx / tileInW; int relInX = inIdx - relInY * tileInW; int inX = relInX + tileInX; int inY = relInY + tileInY; float v = 0.0f; if (inX >= 0 & inY >= 0 & inX < p.inW & inY < p.inH) v = (float)p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; sx[relInY][relInX] = v; } // Loop over output pixels. __syncthreads(); for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW; outIdx += blockDim.x) { int relOutY = outIdx / tileOutW; int relOutX = outIdx - relOutY * tileOutW; int outX = relOutX + tileOutX; int outY = relOutY + tileOutY; // Setup receptive field. int midX = tileMidX + relOutX * downx; int midY = tileMidY + relOutY * downy; int inX = floorDiv(midX, upx); int inY = floorDiv(midY, upy); int relInX = inX - tileInX; int relInY = inY - tileInY; int kernelX = (inX + 1) * upx - midX - 1; // flipped int kernelY = (inY + 1) * upy - midY - 1; // flipped // Inner loop. float v = 0.0f; #pragma unroll for (int y = 0; y < kernelH / upy; y++) #pragma unroll for (int x = 0; x < kernelW / upx; x++) v += sx[relInY + y][relInX + x] * sk[kernelY + y * upy][kernelX + x * upx]; // Store result. if (outX < p.outW & outY < p.outH) p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; } } } //------------------------------------------------------------------------ // TensorFlow op. template struct UpFirDn2DOp : public OpKernel { UpFirDn2DKernelParams m_attribs; UpFirDn2DOp(OpKernelConstruction* ctx) : OpKernel(ctx) { memset(&m_attribs, 0, sizeof(m_attribs)); OP_REQUIRES_OK(ctx, ctx->GetAttr("upx", &m_attribs.upx)); OP_REQUIRES_OK(ctx, ctx->GetAttr("upy", &m_attribs.upy)); OP_REQUIRES_OK(ctx, ctx->GetAttr("downx", &m_attribs.downx)); OP_REQUIRES_OK(ctx, ctx->GetAttr("downy", &m_attribs.downy)); OP_REQUIRES_OK(ctx, ctx->GetAttr("padx0", &m_attribs.padx0)); OP_REQUIRES_OK(ctx, ctx->GetAttr("padx1", &m_attribs.padx1)); OP_REQUIRES_OK(ctx, ctx->GetAttr("pady0", &m_attribs.pady0)); OP_REQUIRES_OK(ctx, ctx->GetAttr("pady1", &m_attribs.pady1)); OP_REQUIRES(ctx, m_attribs.upx >= 1 && m_attribs.upy >= 1, errors::InvalidArgument("upx and upy must be at least 1x1")); OP_REQUIRES(ctx, m_attribs.downx >= 1 && m_attribs.downy >= 1, errors::InvalidArgument("downx and downy must be at least 1x1")); } void Compute(OpKernelContext* ctx) { UpFirDn2DKernelParams p = m_attribs; cudaStream_t stream = ctx->eigen_device().stream(); const Tensor& x = ctx->input(0); // [majorDim, inH, inW, minorDim] const Tensor& k = ctx->input(1); // [kernelH, kernelW] p.x = x.flat().data(); p.k = k.flat().data(); OP_REQUIRES(ctx, x.dims() == 4, errors::InvalidArgument("input must have rank 4")); OP_REQUIRES(ctx, k.dims() == 2, errors::InvalidArgument("kernel must have rank 2")); OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("input too large")); OP_REQUIRES(ctx, k.NumElements() <= kint32max, errors::InvalidArgument("kernel too large")); p.majorDim = (int)x.dim_size(0); p.inH = (int)x.dim_size(1); p.inW = (int)x.dim_size(2); p.minorDim = (int)x.dim_size(3); p.kernelH = (int)k.dim_size(0); p.kernelW = (int)k.dim_size(1); OP_REQUIRES(ctx, p.kernelW >= 1 && p.kernelH >= 1, errors::InvalidArgument("kernel must be at least 1x1")); p.outW = (p.inW * p.upx + p.padx0 + p.padx1 - p.kernelW + p.downx) / p.downx; p.outH = (p.inH * p.upy + p.pady0 + p.pady1 - p.kernelH + p.downy) / p.downy; OP_REQUIRES(ctx, p.outW >= 1 && p.outH >= 1, errors::InvalidArgument("output must be at least 1x1")); Tensor* y = NULL; // [majorDim, outH, outW, minorDim] TensorShape ys; ys.AddDim(p.majorDim); ys.AddDim(p.outH); ys.AddDim(p.outW); ys.AddDim(p.minorDim); OP_REQUIRES_OK(ctx, ctx->allocate_output(0, ys, &y)); p.y = y->flat().data(); OP_REQUIRES(ctx, y->NumElements() <= kint32max, errors::InvalidArgument("output too large")); // Choose CUDA kernel to use. void* cudaKernel = (void*)UpFirDn2DKernel_large; int tileOutW = -1; int tileOutH = -1; if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 7 && p.kernelH <= 7) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 5 && p.kernelH <= 5) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 3 && p.kernelH <= 3) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } // Choose launch params. dim3 blockSize; dim3 gridSize; if (tileOutW > 0 && tileOutH > 0) // small { p.loopMajor = (p.majorDim - 1) / 16384 + 1; p.loopX = 1; blockSize = dim3(32 * 8, 1, 1); gridSize = dim3(((p.outH - 1) / tileOutH + 1) * p.minorDim, (p.outW - 1) / (p.loopX * tileOutW) + 1, (p.majorDim - 1) / p.loopMajor + 1); } else // large { p.loopMajor = (p.majorDim - 1) / 16384 + 1; p.loopX = 4; blockSize = dim3(4, 32, 1); gridSize = dim3((p.outH * p.minorDim - 1) / blockSize.x + 1, (p.outW - 1) / (p.loopX * blockSize.y) + 1, (p.majorDim - 1) / p.loopMajor + 1); } // Launch CUDA kernel. void* args[] = {&p}; OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel(cudaKernel, gridSize, blockSize, args, 0, stream)); } }; REGISTER_OP("UpFirDn2D") .Input ("x: T") .Input ("k: T") .Output ("y: T") .Attr ("T: {float, half}") .Attr ("upx: int = 1") .Attr ("upy: int = 1") .Attr ("downx: int = 1") .Attr ("downy: int = 1") .Attr ("padx0: int = 0") .Attr ("padx1: int = 0") .Attr ("pady0: int = 0") .Attr ("pady1: int = 0"); REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp); REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp); //------------------------------------------------------------------------ ================================================ FILE: FQ-StyleGAN/dnnlib/tflib/ops/upfirdn_2d.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Custom TensorFlow ops for efficient resampling of 2D images.""" import os import numpy as np import tensorflow as tf from .. import custom_ops def _get_plugin(): return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') #---------------------------------------------------------------------------- def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0, impl='cuda'): r"""Pad, upsample, FIR filter, and downsample a batch of 2D images. Accepts a batch of 2D images of the shape `[majorDim, inH, inW, minorDim]` and performs the following operations for each image, batched across `majorDim` and `minorDim`: 1. Pad the image with zeros by the specified number of pixels on each side (`padx0`, `padx1`, `pady0`, `pady1`). Specifying a negative value corresponds to cropping the image. 2. Upsample the image by inserting the zeros after each pixel (`upx`, `upy`). 3. Convolve the image with the specified 2D FIR filter (`k`), shrinking the image so that the footprint of all output pixels lies within the input image. 4. Downsample the image by throwing away pixels (`downx`, `downy`). This sequence of operations bears close resemblance to scipy.signal.upfirdn(). The fused op is considerably more efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of arbitrary order. Args: x: Input tensor of the shape `[majorDim, inH, inW, minorDim]`. k: 2D FIR filter of the shape `[firH, firW]`. upx: Integer upsampling factor along the X-axis (default: 1). upy: Integer upsampling factor along the Y-axis (default: 1). downx: Integer downsampling factor along the X-axis (default: 1). downy: Integer downsampling factor along the Y-axis (default: 1). padx0: Number of pixels to pad on the left side (default: 0). padx1: Number of pixels to pad on the right side (default: 0). pady0: Number of pixels to pad on the top side (default: 0). pady1: Number of pixels to pad on the bottom side (default: 0). impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). Returns: Tensor of the shape `[majorDim, outH, outW, minorDim]`, and same datatype as `x`. """ impl_dict = { 'ref': _upfirdn_2d_ref, 'cuda': _upfirdn_2d_cuda, } return impl_dict[impl](x=x, k=k, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1) #---------------------------------------------------------------------------- def _upfirdn_2d_ref(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): """Slow reference implementation of `upfirdn_2d()` using standard TensorFlow ops.""" x = tf.convert_to_tensor(x) k = np.asarray(k, dtype=np.float32) assert x.shape.rank == 4 inH = x.shape[1].value inW = x.shape[2].value minorDim = _shape(x, 3) kernelH, kernelW = k.shape assert inW >= 1 and inH >= 1 assert kernelW >= 1 and kernelH >= 1 assert isinstance(upx, int) and isinstance(upy, int) assert isinstance(downx, int) and isinstance(downy, int) assert isinstance(padx0, int) and isinstance(padx1, int) assert isinstance(pady0, int) and isinstance(pady1, int) # Upsample (insert zeros). x = tf.reshape(x, [-1, inH, 1, inW, 1, minorDim]) x = tf.pad(x, [[0, 0], [0, 0], [0, upy - 1], [0, 0], [0, upx - 1], [0, 0]]) x = tf.reshape(x, [-1, inH * upy, inW * upx, minorDim]) # Pad (crop if negative). x = tf.pad(x, [[0, 0], [max(pady0, 0), max(pady1, 0)], [max(padx0, 0), max(padx1, 0)], [0, 0]]) x = x[:, max(-pady0, 0) : x.shape[1].value - max(-pady1, 0), max(-padx0, 0) : x.shape[2].value - max(-padx1, 0), :] # Convolve with filter. x = tf.transpose(x, [0, 3, 1, 2]) x = tf.reshape(x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1]) w = tf.constant(k[::-1, ::-1, np.newaxis, np.newaxis], dtype=x.dtype) x = tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='VALID', data_format='NCHW') x = tf.reshape(x, [-1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1]) x = tf.transpose(x, [0, 2, 3, 1]) # Downsample (throw away pixels). return x[:, ::downy, ::downx, :] #---------------------------------------------------------------------------- def _upfirdn_2d_cuda(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): """Fast CUDA implementation of `upfirdn_2d()` using custom ops.""" x = tf.convert_to_tensor(x) k = np.asarray(k, dtype=np.float32) majorDim, inH, inW, minorDim = x.shape.as_list() kernelH, kernelW = k.shape assert inW >= 1 and inH >= 1 assert kernelW >= 1 and kernelH >= 1 assert isinstance(upx, int) and isinstance(upy, int) assert isinstance(downx, int) and isinstance(downy, int) assert isinstance(padx0, int) and isinstance(padx1, int) assert isinstance(pady0, int) and isinstance(pady1, int) outW = (inW * upx + padx0 + padx1 - kernelW) // downx + 1 outH = (inH * upy + pady0 + pady1 - kernelH) // downy + 1 assert outW >= 1 and outH >= 1 kc = tf.constant(k, dtype=x.dtype) gkc = tf.constant(k[::-1, ::-1], dtype=x.dtype) gpadx0 = kernelW - padx0 - 1 gpady0 = kernelH - pady0 - 1 gpadx1 = inW * upx - outW * downx + padx0 - upx + 1 gpady1 = inH * upy - outH * downy + pady0 - upy + 1 @tf.custom_gradient def func(x): y = _get_plugin().up_fir_dn2d(x=x, k=kc, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1) y.set_shape([majorDim, outH, outW, minorDim]) @tf.custom_gradient def grad(dy): dx = _get_plugin().up_fir_dn2d(x=dy, k=gkc, upx=downx, upy=downy, downx=upx, downy=upy, padx0=gpadx0, padx1=gpadx1, pady0=gpady0, pady1=gpady1) dx.set_shape([majorDim, inH, inW, minorDim]) return dx, func return y, grad return func(x) #---------------------------------------------------------------------------- def filter_2d(x, k, gain=1, data_format='NCHW', impl='cuda'): r"""Filter a batch of 2D images with the given FIR filter. Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and filters each image with the given filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the specified `gain`. Pixels outside the image are assumed to be zero. Args: x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). gain: Scaling factor for signal magnitude (default: 1.0). data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). Returns: Tensor of the same shape and datatype as `x`. """ k = _setup_kernel(k) * gain p = k.shape[0] - 1 return _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) #---------------------------------------------------------------------------- def upsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): r"""Upsample a batch of 2D images with the given filter. Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and upsamples each image with the given filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the specified `gain`. Pixels outside the image are assumed to be zero, and the filter is padded with zeros so that its shape is a multiple of the upsampling factor. Args: x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * factor`, which corresponds to nearest-neighbor upsampling. factor: Integer upsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). Returns: Tensor of the shape `[N, C, H * factor, W * factor]` or `[N, H * factor, W * factor, C]`, and same datatype as `x`. """ assert isinstance(factor, int) and factor >= 1 if k is None: k = [1] * factor k = _setup_kernel(k) * (gain * (factor ** 2)) p = k.shape[0] - factor return _simple_upfirdn_2d(x, k, up=factor, pad0=(p+1)//2+factor-1, pad1=p//2, data_format=data_format, impl=impl) #---------------------------------------------------------------------------- def downsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): r"""Downsample a batch of 2D images with the given filter. Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` and downsamples each image with the given filter. The filter is normalized so that if the input pixels are constant, they will be scaled by the specified `gain`. Pixels outside the image are assumed to be zero, and the filter is padded with zeros so that its shape is a multiple of the downsampling factor. Args: x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * factor`, which corresponds to average pooling. factor: Integer downsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). Returns: Tensor of the shape `[N, C, H // factor, W // factor]` or `[N, H // factor, W // factor, C]`, and same datatype as `x`. """ assert isinstance(factor, int) and factor >= 1 if k is None: k = [1] * factor k = _setup_kernel(k) * gain p = k.shape[0] - factor return _simple_upfirdn_2d(x, k, down=factor, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) #---------------------------------------------------------------------------- def upsample_conv_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): r"""Fused `upsample_2d()` followed by `tf.nn.conv2d()`. Padding is performed only once at the beginning, not between the operations. The fused op is considerably more efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of arbitrary order. Args: x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * factor`, which corresponds to nearest-neighbor upsampling. factor: Integer upsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). Returns: Tensor of the shape `[N, C, H * factor, W * factor]` or `[N, H * factor, W * factor, C]`, and same datatype as `x`. """ assert isinstance(factor, int) and factor >= 1 # Check weight shape. w = tf.convert_to_tensor(w) assert w.shape.rank == 4 convH = w.shape[0].value convW = w.shape[1].value inC = _shape(w, 2) outC = _shape(w, 3) assert convW == convH # Setup filter kernel. if k is None: k = [1] * factor k = _setup_kernel(k) * (gain * (factor ** 2)) p = (k.shape[0] - factor) - (convW - 1) # Determine data dimensions. if data_format == 'NCHW': stride = [1, 1, factor, factor] output_shape = [_shape(x, 0), outC, (_shape(x, 2) - 1) * factor + convH, (_shape(x, 3) - 1) * factor + convW] num_groups = _shape(x, 1) // inC else: stride = [1, factor, factor, 1] output_shape = [_shape(x, 0), (_shape(x, 1) - 1) * factor + convH, (_shape(x, 2) - 1) * factor + convW, outC] num_groups = _shape(x, 3) // inC # Transpose weights. w = tf.reshape(w, [convH, convW, inC, num_groups, -1]) w = tf.transpose(w[::-1, ::-1], [0, 1, 4, 3, 2]) w = tf.reshape(w, [convH, convW, -1, num_groups * inC]) # Execute. x = tf.nn.conv2d_transpose(x, w, output_shape=output_shape, strides=stride, padding='VALID', data_format=data_format) return _simple_upfirdn_2d(x, k, pad0=(p+1)//2+factor-1, pad1=p//2+1, data_format=data_format, impl=impl) #---------------------------------------------------------------------------- def conv_downsample_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): r"""Fused `tf.nn.conv2d()` followed by `downsample_2d()`. Padding is performed only once at the beginning, not between the operations. The fused op is considerably more efficient than performing the same calculation using standard TensorFlow ops. It supports gradients of arbitrary order. Args: x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). The default is `[1] * factor`, which corresponds to average pooling. factor: Integer downsampling factor (default: 2). gain: Scaling factor for signal magnitude (default: 1.0). data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). Returns: Tensor of the shape `[N, C, H // factor, W // factor]` or `[N, H // factor, W // factor, C]`, and same datatype as `x`. """ assert isinstance(factor, int) and factor >= 1 w = tf.convert_to_tensor(w) convH, convW, _inC, _outC = w.shape.as_list() assert convW == convH if k is None: k = [1] * factor k = _setup_kernel(k) * gain p = (k.shape[0] - factor) + (convW - 1) if data_format == 'NCHW': s = [1, 1, factor, factor] else: s = [1, factor, factor, 1] x = _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) return tf.nn.conv2d(x, w, strides=s, padding='VALID', data_format=data_format) #---------------------------------------------------------------------------- # Internal helper funcs. def _shape(tf_expr, dim_idx): if tf_expr.shape.rank is not None: dim = tf_expr.shape[dim_idx].value if dim is not None: return dim return tf.shape(tf_expr)[dim_idx] def _setup_kernel(k): k = np.asarray(k, dtype=np.float32) if k.ndim == 1: k = np.outer(k, k) k /= np.sum(k) assert k.ndim == 2 assert k.shape[0] == k.shape[1] return k def _simple_upfirdn_2d(x, k, up=1, down=1, pad0=0, pad1=0, data_format='NCHW', impl='cuda'): assert data_format in ['NCHW', 'NHWC'] assert x.shape.rank == 4 y = x if data_format == 'NCHW': y = tf.reshape(y, [-1, _shape(y, 2), _shape(y, 3), 1]) y = upfirdn_2d(y, k, upx=up, upy=up, downx=down, downy=down, padx0=pad0, padx1=pad1, pady0=pad0, pady1=pad1, impl=impl) if data_format == 'NCHW': y = tf.reshape(y, [-1, _shape(x, 1), _shape(y, 1), _shape(y, 2)]) return y #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/dnnlib/tflib/optimizer.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Helper wrapper for a Tensorflow optimizer.""" import numpy as np import tensorflow as tf from collections import OrderedDict from typing import List, Union from . import autosummary from . import tfutil from .. import util from .tfutil import TfExpression, TfExpressionEx try: # TensorFlow 1.13 from tensorflow.python.ops import nccl_ops except: # Older TensorFlow versions import tensorflow.contrib.nccl as nccl_ops class Optimizer: """A Wrapper for tf.train.Optimizer. Automatically takes care of: - Gradient averaging for multi-GPU training. - Gradient accumulation for arbitrarily large minibatches. - Dynamic loss scaling and typecasts for FP16 training. - Ignoring corrupted gradients that contain NaNs/Infs. - Reporting statistics. - Well-chosen default settings. """ def __init__(self, name: str = "Train", # Name string that will appear in TensorFlow graph. tf_optimizer: str = "tf.train.AdamOptimizer", # Underlying optimizer class. learning_rate: TfExpressionEx = 0.001, # Learning rate. Can vary over time. minibatch_multiplier: TfExpressionEx = None, # Treat N consecutive minibatches as one by accumulating gradients. share: "Optimizer" = None, # Share internal state with a previously created optimizer? use_loss_scaling: bool = False, # Enable dynamic loss scaling for robust mixed-precision training? loss_scaling_init: float = 64.0, # Log2 of initial loss scaling factor. loss_scaling_inc: float = 0.0005, # Log2 of per-minibatch loss scaling increment when there is no overflow. loss_scaling_dec: float = 1.0, # Log2 of per-minibatch loss scaling decrement when there is an overflow. report_mem_usage: bool = False, # Report fine-grained memory usage statistics in TensorBoard? **kwargs): # Public fields. self.name = name self.learning_rate = learning_rate self.minibatch_multiplier = minibatch_multiplier self.id = self.name.replace("/", ".") self.scope = tf.get_default_graph().unique_name(self.id) self.optimizer_class = util.get_obj_by_name(tf_optimizer) self.optimizer_kwargs = dict(kwargs) self.use_loss_scaling = use_loss_scaling self.loss_scaling_init = loss_scaling_init self.loss_scaling_inc = loss_scaling_inc self.loss_scaling_dec = loss_scaling_dec # Private fields. self._updates_applied = False self._devices = OrderedDict() # device_name => EasyDict() self._shared_optimizers = OrderedDict() # device_name => optimizer_class self._gradient_shapes = None # [shape, ...] self._report_mem_usage = report_mem_usage # Validate arguments. assert callable(self.optimizer_class) # Share internal state if requested. if share is not None: assert isinstance(share, Optimizer) assert self.optimizer_class is share.optimizer_class assert self.learning_rate is share.learning_rate assert self.optimizer_kwargs == share.optimizer_kwargs self._shared_optimizers = share._shared_optimizers # pylint: disable=protected-access def _get_device(self, device_name: str): """Get internal state for the given TensorFlow device.""" tfutil.assert_tf_initialized() if device_name in self._devices: return self._devices[device_name] # Initialize fields. device = util.EasyDict() device.name = device_name device.optimizer = None # Underlying optimizer: optimizer_class device.loss_scaling_var = None # Log2 of loss scaling: tf.Variable device.grad_raw = OrderedDict() # Raw gradients: var => [grad, ...] device.grad_clean = OrderedDict() # Clean gradients: var => grad device.grad_acc_vars = OrderedDict() # Accumulation sums: var => tf.Variable device.grad_acc_count = None # Accumulation counter: tf.Variable device.grad_acc = OrderedDict() # Accumulated gradients: var => grad # Setup TensorFlow objects. with tfutil.absolute_name_scope(self.scope + "/Devices"), tf.device(device_name), tf.control_dependencies(None): if device_name not in self._shared_optimizers: optimizer_name = self.scope.replace("/", "_") + "_opt%d" % len(self._shared_optimizers) self._shared_optimizers[device_name] = self.optimizer_class(name=optimizer_name, learning_rate=self.learning_rate, **self.optimizer_kwargs) device.optimizer = self._shared_optimizers[device_name] if self.use_loss_scaling: device.loss_scaling_var = tf.Variable(np.float32(self.loss_scaling_init), trainable=False, name="loss_scaling_var") # Register device. self._devices[device_name] = device return device def register_gradients(self, loss: TfExpression, trainable_vars: Union[List, dict]) -> None: """Register the gradients of the given loss function with respect to the given variables. Intended to be called once per GPU.""" tfutil.assert_tf_initialized() assert not self._updates_applied device = self._get_device(loss.device) # Validate trainables. if isinstance(trainable_vars, dict): trainable_vars = list(trainable_vars.values()) # allow passing in Network.trainables as vars assert isinstance(trainable_vars, list) and len(trainable_vars) >= 1 assert all(tfutil.is_tf_expression(expr) for expr in trainable_vars + [loss]) assert all(var.device == device.name for var in trainable_vars) # Validate shapes. if self._gradient_shapes is None: self._gradient_shapes = [var.shape.as_list() for var in trainable_vars] assert len(trainable_vars) == len(self._gradient_shapes) assert all(var.shape.as_list() == var_shape for var, var_shape in zip(trainable_vars, self._gradient_shapes)) # Report memory usage if requested. deps = [] if self._report_mem_usage: self._report_mem_usage = False try: with tf.name_scope(self.id + '_mem'), tf.device(device.name), tf.control_dependencies([loss]): deps.append(autosummary.autosummary(self.id + "/mem_usage_gb", tf.contrib.memory_stats.BytesInUse() / 2**30)) except tf.errors.NotFoundError: pass # Compute gradients. with tf.name_scope(self.id + "_grad"), tf.device(device.name), tf.control_dependencies(deps): loss = self.apply_loss_scaling(tf.cast(loss, tf.float32)) gate = tf.train.Optimizer.GATE_NONE # disable gating to reduce memory usage grad_list = device.optimizer.compute_gradients(loss=loss, var_list=trainable_vars, gate_gradients=gate) # Register gradients. for grad, var in grad_list: if var not in device.grad_raw: device.grad_raw[var] = [] device.grad_raw[var].append(grad) def apply_updates(self, allow_no_op: bool = False) -> tf.Operation: """Construct training op to update the registered variables based on their gradients.""" tfutil.assert_tf_initialized() assert not self._updates_applied self._updates_applied = True all_ops = [] # Check for no-op. if allow_no_op and len(self._devices) == 0: with tfutil.absolute_name_scope(self.scope): return tf.no_op(name='TrainingOp') # Clean up gradients. for device_idx, device in enumerate(self._devices.values()): with tfutil.absolute_name_scope(self.scope + "/Clean%d" % device_idx), tf.device(device.name): for var, grad in device.grad_raw.items(): # Filter out disconnected gradients and convert to float32. grad = [g for g in grad if g is not None] grad = [tf.cast(g, tf.float32) for g in grad] # Sum within the device. if len(grad) == 0: grad = tf.zeros(var.shape) # No gradients => zero. elif len(grad) == 1: grad = grad[0] # Single gradient => use as is. else: grad = tf.add_n(grad) # Multiple gradients => sum. # Scale as needed. scale = 1.0 / len(device.grad_raw[var]) / len(self._devices) scale = tf.constant(scale, dtype=tf.float32, name="scale") if self.minibatch_multiplier is not None: scale /= tf.cast(self.minibatch_multiplier, tf.float32) scale = self.undo_loss_scaling(scale) device.grad_clean[var] = grad * scale # Sum gradients across devices. if len(self._devices) > 1: with tfutil.absolute_name_scope(self.scope + "/Broadcast"), tf.device(None): for all_vars in zip(*[device.grad_clean.keys() for device in self._devices.values()]): if len(all_vars) > 0 and all(dim > 0 for dim in all_vars[0].shape.as_list()): # NCCL does not support zero-sized tensors. all_grads = [device.grad_clean[var] for device, var in zip(self._devices.values(), all_vars)] all_grads = nccl_ops.all_sum(all_grads) for device, var, grad in zip(self._devices.values(), all_vars, all_grads): device.grad_clean[var] = grad # Apply updates separately on each device. for device_idx, device in enumerate(self._devices.values()): with tfutil.absolute_name_scope(self.scope + "/Apply%d" % device_idx), tf.device(device.name): # pylint: disable=cell-var-from-loop # Accumulate gradients over time. if self.minibatch_multiplier is None: acc_ok = tf.constant(True, name='acc_ok') device.grad_acc = OrderedDict(device.grad_clean) else: # Create variables. with tf.control_dependencies(None): for var in device.grad_clean.keys(): device.grad_acc_vars[var] = tf.Variable(tf.zeros(var.shape), trainable=False, name="grad_acc_var") device.grad_acc_count = tf.Variable(tf.zeros([]), trainable=False, name="grad_acc_count") # Track counter. count_cur = device.grad_acc_count + 1.0 count_inc_op = lambda: tf.assign(device.grad_acc_count, count_cur) count_reset_op = lambda: tf.assign(device.grad_acc_count, tf.zeros([])) acc_ok = (count_cur >= tf.cast(self.minibatch_multiplier, tf.float32)) all_ops.append(tf.cond(acc_ok, count_reset_op, count_inc_op)) # Track gradients. for var, grad in device.grad_clean.items(): acc_var = device.grad_acc_vars[var] acc_cur = acc_var + grad device.grad_acc[var] = acc_cur with tf.control_dependencies([acc_cur]): acc_inc_op = lambda: tf.assign(acc_var, acc_cur) acc_reset_op = lambda: tf.assign(acc_var, tf.zeros(var.shape)) all_ops.append(tf.cond(acc_ok, acc_reset_op, acc_inc_op)) # No overflow => apply gradients. all_ok = tf.reduce_all(tf.stack([acc_ok] + [tf.reduce_all(tf.is_finite(g)) for g in device.grad_acc.values()])) apply_op = lambda: device.optimizer.apply_gradients([(tf.cast(grad, var.dtype), var) for var, grad in device.grad_acc.items()]) all_ops.append(tf.cond(all_ok, apply_op, tf.no_op)) # Adjust loss scaling. if self.use_loss_scaling: ls_inc_op = lambda: tf.assign_add(device.loss_scaling_var, self.loss_scaling_inc) ls_dec_op = lambda: tf.assign_sub(device.loss_scaling_var, self.loss_scaling_dec) ls_update_op = lambda: tf.group(tf.cond(all_ok, ls_inc_op, ls_dec_op)) all_ops.append(tf.cond(acc_ok, ls_update_op, tf.no_op)) # Last device => report statistics. if device_idx == len(self._devices) - 1: all_ops.append(autosummary.autosummary(self.id + "/learning_rate", self.learning_rate)) all_ops.append(autosummary.autosummary(self.id + "/overflow_frequency", tf.where(all_ok, 0, 1), condition=acc_ok)) if self.use_loss_scaling: all_ops.append(autosummary.autosummary(self.id + "/loss_scaling_log2", device.loss_scaling_var)) # Initialize variables. self.reset_optimizer_state() if self.use_loss_scaling: tfutil.init_uninitialized_vars([device.loss_scaling_var for device in self._devices.values()]) if self.minibatch_multiplier is not None: tfutil.run([var.initializer for device in self._devices.values() for var in list(device.grad_acc_vars.values()) + [device.grad_acc_count]]) # Group everything into a single op. with tfutil.absolute_name_scope(self.scope): return tf.group(*all_ops, name="TrainingOp") def reset_optimizer_state(self) -> None: """Reset internal state of the underlying optimizer.""" tfutil.assert_tf_initialized() tfutil.run([var.initializer for device in self._devices.values() for var in device.optimizer.variables()]) def get_loss_scaling_var(self, device: str) -> Union[tf.Variable, None]: """Get or create variable representing log2 of the current dynamic loss scaling factor.""" return self._get_device(device).loss_scaling_var def apply_loss_scaling(self, value: TfExpression) -> TfExpression: """Apply dynamic loss scaling for the given expression.""" assert tfutil.is_tf_expression(value) if not self.use_loss_scaling: return value return value * tfutil.exp2(self.get_loss_scaling_var(value.device)) def undo_loss_scaling(self, value: TfExpression) -> TfExpression: """Undo the effect of dynamic loss scaling for the given expression.""" assert tfutil.is_tf_expression(value) if not self.use_loss_scaling: return value return value * tfutil.exp2(-self.get_loss_scaling_var(value.device)) # pylint: disable=invalid-unary-operand-type class SimpleAdam: """Simplified version of tf.train.AdamOptimizer that behaves identically when used with dnnlib.tflib.Optimizer.""" def __init__(self, name="Adam", learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8): self.name = name self.learning_rate = learning_rate self.beta1 = beta1 self.beta2 = beta2 self.epsilon = epsilon self.all_state_vars = [] def variables(self): return self.all_state_vars def compute_gradients(self, loss, var_list, gate_gradients=tf.train.Optimizer.GATE_NONE): assert gate_gradients == tf.train.Optimizer.GATE_NONE return list(zip(tf.gradients(loss, var_list), var_list)) def apply_gradients(self, grads_and_vars): with tf.name_scope(self.name): state_vars = [] update_ops = [] # Adjust learning rate to deal with startup bias. with tf.control_dependencies(None): b1pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) b2pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) state_vars += [b1pow_var, b2pow_var] b1pow_new = b1pow_var * self.beta1 b2pow_new = b2pow_var * self.beta2 update_ops += [tf.assign(b1pow_var, b1pow_new), tf.assign(b2pow_var, b2pow_new)] lr_new = self.learning_rate * tf.sqrt(1 - b2pow_new) / (1 - b1pow_new) # Construct ops to update each variable. for grad, var in grads_and_vars: with tf.control_dependencies(None): m_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) v_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) state_vars += [m_var, v_var] m_new = self.beta1 * m_var + (1 - self.beta1) * grad v_new = self.beta2 * v_var + (1 - self.beta2) * tf.square(grad) var_delta = lr_new * m_new / (tf.sqrt(v_new) + self.epsilon) update_ops += [tf.assign(m_var, m_new), tf.assign(v_var, v_new), tf.assign_sub(var, var_delta)] # Group everything together. self.all_state_vars += state_vars return tf.group(*update_ops) ================================================ FILE: FQ-StyleGAN/dnnlib/tflib/tfutil.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Miscellaneous helper utils for Tensorflow.""" import os import numpy as np import tensorflow as tf # Silence deprecation warnings from TensorFlow 1.13 onwards import logging logging.getLogger('tensorflow').setLevel(logging.ERROR) import tensorflow.contrib # requires TensorFlow 1.x! tf.contrib = tensorflow.contrib from typing import Any, Iterable, List, Union TfExpression = Union[tf.Tensor, tf.Variable, tf.Operation] """A type that represents a valid Tensorflow expression.""" TfExpressionEx = Union[TfExpression, int, float, np.ndarray] """A type that can be converted to a valid Tensorflow expression.""" def run(*args, **kwargs) -> Any: """Run the specified ops in the default session.""" assert_tf_initialized() return tf.get_default_session().run(*args, **kwargs) def is_tf_expression(x: Any) -> bool: """Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation.""" return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation)) def shape_to_list(shape: Iterable[tf.Dimension]) -> List[Union[int, None]]: """Convert a Tensorflow shape to a list of ints. Retained for backwards compatibility -- use TensorShape.as_list() in new code.""" return [dim.value for dim in shape] def flatten(x: TfExpressionEx) -> TfExpression: """Shortcut function for flattening a tensor.""" with tf.name_scope("Flatten"): return tf.reshape(x, [-1]) def log2(x: TfExpressionEx) -> TfExpression: """Logarithm in base 2.""" with tf.name_scope("Log2"): return tf.log(x) * np.float32(1.0 / np.log(2.0)) def exp2(x: TfExpressionEx) -> TfExpression: """Exponent in base 2.""" with tf.name_scope("Exp2"): return tf.exp(x * np.float32(np.log(2.0))) def lerp(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpressionEx: """Linear interpolation.""" with tf.name_scope("Lerp"): return a + (b - a) * t def lerp_clip(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpression: """Linear interpolation with clip.""" with tf.name_scope("LerpClip"): return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0) def absolute_name_scope(scope: str) -> tf.name_scope: """Forcefully enter the specified name scope, ignoring any surrounding scopes.""" return tf.name_scope(scope + "/") def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope: """Forcefully enter the specified variable scope, ignoring any surrounding scopes.""" return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False) def _sanitize_tf_config(config_dict: dict = None) -> dict: # Defaults. cfg = dict() cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is. cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is. cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info. cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used. cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed. # Remove defaults for environment variables that are already set. for key in list(cfg): fields = key.split(".") if fields[0] == "env": assert len(fields) == 2 if fields[1] in os.environ: del cfg[key] # User overrides. if config_dict is not None: cfg.update(config_dict) return cfg def init_tf(config_dict: dict = None) -> None: """Initialize TensorFlow session using good default settings.""" # Skip if already initialized. if tf.get_default_session() is not None: return # Setup config dict and random seeds. cfg = _sanitize_tf_config(config_dict) np_random_seed = cfg["rnd.np_random_seed"] if np_random_seed is not None: np.random.seed(np_random_seed) tf_random_seed = cfg["rnd.tf_random_seed"] if tf_random_seed == "auto": tf_random_seed = np.random.randint(1 << 31) if tf_random_seed is not None: tf.set_random_seed(tf_random_seed) # Setup environment variables. for key, value in cfg.items(): fields = key.split(".") if fields[0] == "env": assert len(fields) == 2 os.environ[fields[1]] = str(value) # Create default TensorFlow session. create_session(cfg, force_as_default=True) def assert_tf_initialized(): """Check that TensorFlow session has been initialized.""" if tf.get_default_session() is None: raise RuntimeError("No default TensorFlow session found. Please call dnnlib.tflib.init_tf().") def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session: """Create tf.Session based on config dict.""" # Setup TensorFlow config proto. cfg = _sanitize_tf_config(config_dict) config_proto = tf.ConfigProto() for key, value in cfg.items(): fields = key.split(".") if fields[0] not in ["rnd", "env"]: obj = config_proto for field in fields[:-1]: obj = getattr(obj, field) setattr(obj, fields[-1], value) # Create session. session = tf.Session(config=config_proto) if force_as_default: # pylint: disable=protected-access session._default_session = session.as_default() session._default_session.enforce_nesting = False session._default_session.__enter__() return session def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None: """Initialize all tf.Variables that have not already been initialized. Equivalent to the following, but more efficient and does not bloat the tf graph: tf.variables_initializer(tf.report_uninitialized_variables()).run() """ assert_tf_initialized() if target_vars is None: target_vars = tf.global_variables() test_vars = [] test_ops = [] with tf.control_dependencies(None): # ignore surrounding control_dependencies for var in target_vars: assert is_tf_expression(var) try: tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0")) except KeyError: # Op does not exist => variable may be uninitialized. test_vars.append(var) with absolute_name_scope(var.name.split(":")[0]): test_ops.append(tf.is_variable_initialized(var)) init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited] run([var.initializer for var in init_vars]) def set_vars(var_to_value_dict: dict) -> None: """Set the values of given tf.Variables. Equivalent to the following, but more efficient and does not bloat the tf graph: tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()] """ assert_tf_initialized() ops = [] feed_dict = {} for var, value in var_to_value_dict.items(): assert is_tf_expression(var) try: setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0")) # look for existing op except KeyError: with absolute_name_scope(var.name.split(":")[0]): with tf.control_dependencies(None): # ignore surrounding control_dependencies setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter") # create new setter ops.append(setter) feed_dict[setter.op.inputs[1]] = value run(ops, feed_dict) def create_var_with_large_initial_value(initial_value: np.ndarray, *args, **kwargs): """Create tf.Variable with large initial value without bloating the tf graph.""" assert_tf_initialized() assert isinstance(initial_value, np.ndarray) zeros = tf.zeros(initial_value.shape, initial_value.dtype) var = tf.Variable(zeros, *args, **kwargs) set_vars({var: initial_value}) return var def convert_images_from_uint8(images, drange=[-1,1], nhwc_to_nchw=False): """Convert a minibatch of images from uint8 to float32 with configurable dynamic range. Can be used as an input transformation for Network.run(). """ images = tf.cast(images, tf.float32) if nhwc_to_nchw: images = tf.transpose(images, [0, 3, 1, 2]) return images * ((drange[1] - drange[0]) / 255) + drange[0] def convert_images_to_uint8(images, drange=[-1,1], nchw_to_nhwc=False, shrink=1): """Convert a minibatch of images from float32 to uint8 with configurable dynamic range. Can be used as an output transformation for Network.run(). """ images = tf.cast(images, tf.float32) if shrink > 1: ksize = [1, 1, shrink, shrink] images = tf.nn.avg_pool(images, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") if nchw_to_nhwc: images = tf.transpose(images, [0, 2, 3, 1]) scale = 255 / (drange[1] - drange[0]) images = images * scale + (0.5 - drange[0] * scale) return tf.saturate_cast(images, tf.uint8) ================================================ FILE: FQ-StyleGAN/dnnlib/util.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Miscellaneous utility classes and functions.""" import ctypes import fnmatch import importlib import inspect import numpy as np import os import shutil import sys import types import io import pickle import re import requests import html import hashlib import glob import uuid from distutils.util import strtobool from typing import Any, List, Tuple, Union # Util classes # ------------------------------------------------------------------------------------------ class EasyDict(dict): """Convenience class that behaves like a dict but allows access with the attribute syntax.""" def __getattr__(self, name: str) -> Any: try: return self[name] except KeyError: raise AttributeError(name) def __setattr__(self, name: str, value: Any) -> None: self[name] = value def __delattr__(self, name: str) -> None: del self[name] class Logger(object): """Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file.""" def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True): self.file = None if file_name is not None: self.file = open(file_name, file_mode) self.should_flush = should_flush self.stdout = sys.stdout self.stderr = sys.stderr sys.stdout = self sys.stderr = self def __enter__(self) -> "Logger": return self def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: self.close() def write(self, text: str) -> None: """Write text to stdout (and a file) and optionally flush.""" if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash return if self.file is not None: self.file.write(text) self.stdout.write(text) if self.should_flush: self.flush() def flush(self) -> None: """Flush written text to both stdout and a file, if open.""" if self.file is not None: self.file.flush() self.stdout.flush() def close(self) -> None: """Flush, close possible files, and remove stdout/stderr mirroring.""" self.flush() # if using multiple loggers, prevent closing in wrong order if sys.stdout is self: sys.stdout = self.stdout if sys.stderr is self: sys.stderr = self.stderr if self.file is not None: self.file.close() # Small util functions # ------------------------------------------------------------------------------------------ def format_time(seconds: Union[int, float]) -> str: """Convert the seconds to human readable string with days, hours, minutes and seconds.""" s = int(np.rint(seconds)) if s < 60: return "{0}s".format(s) elif s < 60 * 60: return "{0}m {1:02}s".format(s // 60, s % 60) elif s < 24 * 60 * 60: return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60) else: return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60) def ask_yes_no(question: str) -> bool: """Ask the user the question until the user inputs a valid answer.""" while True: try: print("{0} [y/n]".format(question)) return strtobool(input().lower()) except ValueError: pass def tuple_product(t: Tuple) -> Any: """Calculate the product of the tuple elements.""" result = 1 for v in t: result *= v return result _str_to_ctype = { "uint8": ctypes.c_ubyte, "uint16": ctypes.c_uint16, "uint32": ctypes.c_uint32, "uint64": ctypes.c_uint64, "int8": ctypes.c_byte, "int16": ctypes.c_int16, "int32": ctypes.c_int32, "int64": ctypes.c_int64, "float32": ctypes.c_float, "float64": ctypes.c_double } def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]: """Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes.""" type_str = None if isinstance(type_obj, str): type_str = type_obj elif hasattr(type_obj, "__name__"): type_str = type_obj.__name__ elif hasattr(type_obj, "name"): type_str = type_obj.name else: raise RuntimeError("Cannot infer type name from input") assert type_str in _str_to_ctype.keys() my_dtype = np.dtype(type_str) my_ctype = _str_to_ctype[type_str] assert my_dtype.itemsize == ctypes.sizeof(my_ctype) return my_dtype, my_ctype def is_pickleable(obj: Any) -> bool: try: with io.BytesIO() as stream: pickle.dump(obj, stream) return True except: return False # Functionality to import modules/objects by name, and call functions by name # ------------------------------------------------------------------------------------------ def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]: """Searches for the underlying module behind the name to some python object. Returns the module and the object name (original name with module part removed).""" # allow convenience shorthands, substitute them by full names obj_name = re.sub("^np.", "numpy.", obj_name) obj_name = re.sub("^tf.", "tensorflow.", obj_name) # list alternatives for (module_name, local_obj_name) parts = obj_name.split(".") name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)] # try each alternative in turn for module_name, local_obj_name in name_pairs: try: module = importlib.import_module(module_name) # may raise ImportError get_obj_from_module(module, local_obj_name) # may raise AttributeError return module, local_obj_name except: pass # maybe some of the modules themselves contain errors? for module_name, _local_obj_name in name_pairs: try: importlib.import_module(module_name) # may raise ImportError except ImportError: if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"): raise # maybe the requested attribute is missing? for module_name, local_obj_name in name_pairs: try: module = importlib.import_module(module_name) # may raise ImportError get_obj_from_module(module, local_obj_name) # may raise AttributeError except ImportError: pass # we are out of luck, but we have no idea why raise ImportError(obj_name) def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any: """Traverses the object name and returns the last (rightmost) python object.""" if obj_name == '': return module obj = module for part in obj_name.split("."): obj = getattr(obj, part) return obj def get_obj_by_name(name: str) -> Any: """Finds the python object with the given name.""" module, obj_name = get_module_from_obj_name(name) return get_obj_from_module(module, obj_name) def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any: """Finds the python object with the given name and calls it as a function.""" assert func_name is not None func_obj = get_obj_by_name(func_name) assert callable(func_obj) return func_obj(*args, **kwargs) def get_module_dir_by_obj_name(obj_name: str) -> str: """Get the directory path of the module containing the given object name.""" module, _ = get_module_from_obj_name(obj_name) return os.path.dirname(inspect.getfile(module)) def is_top_level_function(obj: Any) -> bool: """Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'.""" return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__ def get_top_level_function_name(obj: Any) -> str: """Return the fully-qualified name of a top-level function.""" assert is_top_level_function(obj) return obj.__module__ + "." + obj.__name__ # File system helpers # ------------------------------------------------------------------------------------------ def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]: """List all files recursively in a given directory while ignoring given file and directory names. Returns list of tuples containing both absolute and relative paths.""" assert os.path.isdir(dir_path) base_name = os.path.basename(os.path.normpath(dir_path)) if ignores is None: ignores = [] result = [] for root, dirs, files in os.walk(dir_path, topdown=True): for ignore_ in ignores: dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)] # dirs need to be edited in-place for d in dirs_to_remove: dirs.remove(d) files = [f for f in files if not fnmatch.fnmatch(f, ignore_)] absolute_paths = [os.path.join(root, f) for f in files] relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths] if add_base_to_relative: relative_paths = [os.path.join(base_name, p) for p in relative_paths] assert len(absolute_paths) == len(relative_paths) result += zip(absolute_paths, relative_paths) return result def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None: """Takes in a list of tuples of (src, dst) paths and copies files. Will create all necessary directories.""" for file in files: target_dir_name = os.path.dirname(file[1]) # will create all intermediate-level directories if not os.path.exists(target_dir_name): os.makedirs(target_dir_name) shutil.copyfile(file[0], file[1]) # URL helpers # ------------------------------------------------------------------------------------------ def is_url(obj: Any, allow_file_urls: bool = False) -> bool: """Determine whether the given object is a valid URL string.""" if not isinstance(obj, str) or not "://" in obj: return False if allow_file_urls and obj.startswith('file:///'): return True try: res = requests.compat.urlparse(obj) if not res.scheme or not res.netloc or not "." in res.netloc: return False res = requests.compat.urlparse(requests.compat.urljoin(obj, "/")) if not res.scheme or not res.netloc or not "." in res.netloc: return False except: return False return True def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True) -> Any: """Download the given URL and return a binary-mode file object to access the data.""" assert is_url(url, allow_file_urls=True) assert num_attempts >= 1 # Handle file URLs. if url.startswith('file:///'): return open(url[len('file:///'):], "rb") # Lookup from cache. url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest() if cache_dir is not None: cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*")) if len(cache_files) == 1: return open(cache_files[0], "rb") # Download. url_name = None url_data = None with requests.Session() as session: if verbose: print("Downloading %s ..." % url, end="", flush=True) for attempts_left in reversed(range(num_attempts)): try: with session.get(url) as res: res.raise_for_status() if len(res.content) == 0: raise IOError("No data received") if len(res.content) < 8192: content_str = res.content.decode("utf-8") if "download_warning" in res.headers.get("Set-Cookie", ""): links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link] if len(links) == 1: url = requests.compat.urljoin(url, links[0]) raise IOError("Google Drive virus checker nag") if "Google Drive - Quota exceeded" in content_str: raise IOError("Google Drive download quota exceeded -- please try again later") match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", "")) url_name = match[1] if match else url url_data = res.content if verbose: print(" done") break except: if not attempts_left: if verbose: print(" failed") raise if verbose: print(".", end="", flush=True) # Save to cache. if cache_dir is not None: safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name) cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name) temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name) os.makedirs(cache_dir, exist_ok=True) with open(temp_file, "wb") as f: f.write(url_data) os.replace(temp_file, cache_file) # atomic # Return data as file object. return io.BytesIO(url_data) ================================================ FILE: FQ-StyleGAN/metrics/__init__.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html # empty ================================================ FILE: FQ-StyleGAN/metrics/frechet_inception_distance.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Frechet Inception Distance (FID).""" import os import numpy as np import scipy import tensorflow as tf import dnnlib.tflib as tflib from metrics import metric_base from training import misc #---------------------------------------------------------------------------- class FID(metric_base.MetricBase): def __init__(self, num_images, minibatch_per_gpu, **kwargs): super().__init__(**kwargs) self.num_images = num_images self.minibatch_per_gpu = minibatch_per_gpu def _evaluate(self, Gs, Gs_kwargs, num_gpus): minibatch_size = num_gpus * self.minibatch_per_gpu inception = misc.load_pkl('http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/inception_v3_features.pkl') activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32) # Calculate statistics for reals. cache_file = self._get_cache_file_for_reals(num_images=self.num_images) os.makedirs(os.path.dirname(cache_file), exist_ok=True) if os.path.isfile(cache_file): mu_real, sigma_real = misc.load_pkl(cache_file) else: for idx, images in enumerate(self._iterate_reals(minibatch_size=minibatch_size)): begin = idx * minibatch_size end = min(begin + minibatch_size, self.num_images) activations[begin:end] = inception.run(images[:end-begin], num_gpus=num_gpus, assume_frozen=True) if end == self.num_images: break mu_real = np.mean(activations, axis=0) sigma_real = np.cov(activations, rowvar=False) misc.save_pkl((mu_real, sigma_real), cache_file) # Construct TensorFlow graph. result_expr = [] for gpu_idx in range(num_gpus): with tf.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() inception_clone = inception.clone() latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) labels = self._get_random_labels_tf(self.minibatch_per_gpu) images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs) images = tflib.convert_images_to_uint8(images) result_expr.append(inception_clone.get_output_for(images)) # Calculate statistics for fakes. for begin in range(0, self.num_images, minibatch_size): self._report_progress(begin, self.num_images) end = min(begin + minibatch_size, self.num_images) activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin] mu_fake = np.mean(activations, axis=0) sigma_fake = np.cov(activations, rowvar=False) # Calculate FID. m = np.square(mu_fake - mu_real).sum() s, _ = scipy.linalg.sqrtm(np.dot(sigma_fake, sigma_real), disp=False) # pylint: disable=no-member dist = m + np.trace(sigma_fake + sigma_real - 2*s) self._report_result(np.real(dist)) #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/metrics/inception_score.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Inception Score (IS).""" import numpy as np import tensorflow as tf import dnnlib.tflib as tflib from metrics import metric_base from training import misc #---------------------------------------------------------------------------- class IS(metric_base.MetricBase): def __init__(self, num_images, num_splits, minibatch_per_gpu, **kwargs): super().__init__(**kwargs) self.num_images = num_images self.num_splits = num_splits self.minibatch_per_gpu = minibatch_per_gpu def _evaluate(self, Gs, Gs_kwargs, num_gpus): minibatch_size = num_gpus * self.minibatch_per_gpu inception = misc.load_pkl('http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/inception_v3_softmax.pkl') activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32) # Construct TensorFlow graph. result_expr = [] for gpu_idx in range(num_gpus): with tf.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() inception_clone = inception.clone() latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) labels = self._get_random_labels_tf(self.minibatch_per_gpu) images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs) images = tflib.convert_images_to_uint8(images) result_expr.append(inception_clone.get_output_for(images)) # Calculate activations for fakes. for begin in range(0, self.num_images, minibatch_size): self._report_progress(begin, self.num_images) end = min(begin + minibatch_size, self.num_images) activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin] # Calculate IS. scores = [] for i in range(self.num_splits): part = activations[i * self.num_images // self.num_splits : (i + 1) * self.num_images // self.num_splits] kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0))) kl = np.mean(np.sum(kl, 1)) scores.append(np.exp(kl)) self._report_result(np.mean(scores), suffix='_mean') self._report_result(np.std(scores), suffix='_std') #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/metrics/linear_separability.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Linear Separability (LS).""" from collections import defaultdict import numpy as np import sklearn.svm import tensorflow as tf import dnnlib.tflib as tflib from metrics import metric_base from training import misc #---------------------------------------------------------------------------- classifier_urls = [ 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-00-male.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-01-smiling.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-02-attractive.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-03-wavy-hair.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-04-young.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-05-5-o-clock-shadow.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-06-arched-eyebrows.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-07-bags-under-eyes.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-08-bald.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-09-bangs.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-10-big-lips.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-11-big-nose.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-12-black-hair.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-13-blond-hair.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-14-blurry.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-15-brown-hair.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-16-bushy-eyebrows.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-17-chubby.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-18-double-chin.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-19-eyeglasses.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-20-goatee.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-21-gray-hair.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-22-heavy-makeup.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-23-high-cheekbones.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-24-mouth-slightly-open.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-25-mustache.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-26-narrow-eyes.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-27-no-beard.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-28-oval-face.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-29-pale-skin.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-30-pointy-nose.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-31-receding-hairline.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-32-rosy-cheeks.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-33-sideburns.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-34-straight-hair.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-35-wearing-earrings.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-36-wearing-hat.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-37-wearing-lipstick.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-38-wearing-necklace.pkl', 'http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/celebahq-classifier-39-wearing-necktie.pkl', ] #---------------------------------------------------------------------------- def prob_normalize(p): p = np.asarray(p).astype(np.float32) assert len(p.shape) == 2 return p / np.sum(p) def mutual_information(p): p = prob_normalize(p) px = np.sum(p, axis=1) py = np.sum(p, axis=0) result = 0.0 for x in range(p.shape[0]): p_x = px[x] for y in range(p.shape[1]): p_xy = p[x][y] p_y = py[y] if p_xy > 0.0: result += p_xy * np.log2(p_xy / (p_x * p_y)) # get bits as output return result def entropy(p): p = prob_normalize(p) result = 0.0 for x in range(p.shape[0]): for y in range(p.shape[1]): p_xy = p[x][y] if p_xy > 0.0: result -= p_xy * np.log2(p_xy) return result def conditional_entropy(p): # H(Y|X) where X corresponds to axis 0, Y to axis 1 # i.e., How many bits of additional information are needed to where we are on axis 1 if we know where we are on axis 0? p = prob_normalize(p) y = np.sum(p, axis=0, keepdims=True) # marginalize to calculate H(Y) return max(0.0, entropy(y) - mutual_information(p)) # can slip just below 0 due to FP inaccuracies, clean those up. #---------------------------------------------------------------------------- class LS(metric_base.MetricBase): def __init__(self, num_samples, num_keep, attrib_indices, minibatch_per_gpu, **kwargs): assert num_keep <= num_samples super().__init__(**kwargs) self.num_samples = num_samples self.num_keep = num_keep self.attrib_indices = attrib_indices self.minibatch_per_gpu = minibatch_per_gpu def _evaluate(self, Gs, Gs_kwargs, num_gpus): minibatch_size = num_gpus * self.minibatch_per_gpu # Construct TensorFlow graph for each GPU. result_expr = [] for gpu_idx in range(num_gpus): with tf.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() # Generate images. latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) labels = self._get_random_labels_tf(self.minibatch_per_gpu) dlatents = Gs_clone.components.mapping.get_output_for(latents, labels, **Gs_kwargs) images = Gs_clone.get_output_for(latents, None, **Gs_kwargs) # Downsample to 256x256. The attribute classifiers were built for 256x256. if images.shape[2] > 256: factor = images.shape[2] // 256 images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor]) images = tf.reduce_mean(images, axis=[3, 5]) # Run classifier for each attribute. result_dict = dict(latents=latents, dlatents=dlatents[:,-1]) for attrib_idx in self.attrib_indices: classifier = misc.load_pkl(classifier_urls[attrib_idx]) logits = classifier.get_output_for(images, None) predictions = tf.nn.softmax(tf.concat([logits, -logits], axis=1)) result_dict[attrib_idx] = predictions result_expr.append(result_dict) # Sampling loop. results = [] for begin in range(0, self.num_samples, minibatch_size): self._report_progress(begin, self.num_samples) results += tflib.run(result_expr) results = {key: np.concatenate([value[key] for value in results], axis=0) for key in results[0].keys()} # Calculate conditional entropy for each attribute. conditional_entropies = defaultdict(list) for attrib_idx in self.attrib_indices: # Prune the least confident samples. pruned_indices = list(range(self.num_samples)) pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i])) pruned_indices = pruned_indices[:self.num_keep] # Fit SVM to the remaining samples. svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1) for space in ['latents', 'dlatents']: svm_inputs = results[space][pruned_indices] try: svm = sklearn.svm.LinearSVC() svm.fit(svm_inputs, svm_targets) svm.score(svm_inputs, svm_targets) svm_outputs = svm.predict(svm_inputs) except: svm_outputs = svm_targets # assume perfect prediction # Calculate conditional entropy. p = [[np.mean([case == (row, col) for case in zip(svm_outputs, svm_targets)]) for col in (0, 1)] for row in (0, 1)] conditional_entropies[space].append(conditional_entropy(p)) # Calculate separability scores. scores = {key: 2**np.sum(values) for key, values in conditional_entropies.items()} self._report_result(scores['latents'], suffix='_z') self._report_result(scores['dlatents'], suffix='_w') #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/metrics/metric_base.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Common definitions for GAN metrics.""" import os import time import hashlib import numpy as np import tensorflow as tf import dnnlib import dnnlib.tflib as tflib from training import misc from training import dataset #---------------------------------------------------------------------------- # Base class for metrics. class MetricBase: def __init__(self, name): self.name = name self._dataset_obj = None self._progress_lo = None self._progress_hi = None self._progress_max = None self._progress_sec = None self._progress_time = None self._reset() def close(self): self._reset() def _reset(self, network_pkl=None, run_dir=None, data_dir=None, dataset_args=None, mirror_augment=None): if self._dataset_obj is not None: self._dataset_obj.close() self._network_pkl = network_pkl self._data_dir = data_dir self._dataset_args = dataset_args self._dataset_obj = None self._mirror_augment = mirror_augment self._eval_time = 0 self._results = [] if (dataset_args is None or mirror_augment is None) and run_dir is not None: run_config = misc.parse_config_for_previous_run(run_dir) self._dataset_args = dict(run_config['dataset']) self._dataset_args['shuffle_mb'] = 0 self._mirror_augment = run_config['train'].get('mirror_augment', False) def configure_progress_reports(self, plo, phi, pmax, psec=15): self._progress_lo = plo self._progress_hi = phi self._progress_max = pmax self._progress_sec = psec def run(self, network_pkl, run_dir=None, data_dir=None, dataset_args=None, mirror_augment=None, num_gpus=1, tf_config=None, log_results=True, Gs_kwargs=dict(is_validation=True)): self._reset(network_pkl=network_pkl, run_dir=run_dir, data_dir=data_dir, dataset_args=dataset_args, mirror_augment=mirror_augment) time_begin = time.time() with tf.Graph().as_default(), tflib.create_session(tf_config).as_default(): # pylint: disable=not-context-manager self._report_progress(0, 1) _G, _D, Gs = misc.load_pkl(self._network_pkl) self._evaluate(Gs, Gs_kwargs=Gs_kwargs, num_gpus=num_gpus) self._report_progress(1, 1) self._eval_time = time.time() - time_begin # pylint: disable=attribute-defined-outside-init if log_results: if run_dir is not None: log_file = os.path.join(run_dir, 'metric-%s.txt' % self.name) with dnnlib.util.Logger(log_file, 'a'): print(self.get_result_str().strip()) else: print(self.get_result_str().strip()) def get_result_str(self): network_name = os.path.splitext(os.path.basename(self._network_pkl))[0] if len(network_name) > 29: network_name = '...' + network_name[-26:] result_str = '%-30s' % network_name result_str += ' time %-12s' % dnnlib.util.format_time(self._eval_time) for res in self._results: result_str += ' ' + self.name + res.suffix + ' ' result_str += res.fmt % res.value return result_str def update_autosummaries(self): for res in self._results: tflib.autosummary.autosummary('Metrics/' + self.name + res.suffix, res.value) def _evaluate(self, Gs, Gs_kwargs, num_gpus): raise NotImplementedError # to be overridden by subclasses def _report_result(self, value, suffix='', fmt='%-10.4f'): self._results += [dnnlib.EasyDict(value=value, suffix=suffix, fmt=fmt)] def _report_progress(self, pcur, pmax, status_str=''): if self._progress_lo is None or self._progress_hi is None or self._progress_max is None: return t = time.time() if self._progress_sec is not None and self._progress_time is not None and t < self._progress_time + self._progress_sec: return self._progress_time = t val = self._progress_lo + (pcur / pmax) * (self._progress_hi - self._progress_lo) dnnlib.RunContext.get().update(status_str, int(val), self._progress_max) def _get_cache_file_for_reals(self, extension='pkl', **kwargs): all_args = dnnlib.EasyDict(metric_name=self.name, mirror_augment=self._mirror_augment) all_args.update(self._dataset_args) all_args.update(kwargs) md5 = hashlib.md5(repr(sorted(all_args.items())).encode('utf-8')) dataset_name = self._dataset_args.get('tfrecord_dir', None) or self._dataset_args.get('h5_file', None) dataset_name = os.path.splitext(os.path.basename(dataset_name))[0] return os.path.join('.stylegan2-cache', '%s-%s-%s.%s' % (md5.hexdigest(), self.name, dataset_name, extension)) def _get_dataset_obj(self): if self._dataset_obj is None: self._dataset_obj = dataset.load_dataset(data_dir=self._data_dir, **self._dataset_args) return self._dataset_obj def _iterate_reals(self, minibatch_size): dataset_obj = self._get_dataset_obj() while True: images, _labels = dataset_obj.get_minibatch_np(minibatch_size) if self._mirror_augment: images = misc.apply_mirror_augment(images) yield images def _iterate_fakes(self, Gs, minibatch_size, num_gpus): while True: latents = np.random.randn(minibatch_size, *Gs.input_shape[1:]) fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) images = Gs.run(latents, None, output_transform=fmt, is_validation=True, num_gpus=num_gpus, assume_frozen=True) yield images def _get_random_labels_tf(self, minibatch_size): return self._get_dataset_obj().get_random_labels_tf(minibatch_size) #---------------------------------------------------------------------------- # Group of multiple metrics. class MetricGroup: def __init__(self, metric_kwarg_list): self.metrics = [dnnlib.util.call_func_by_name(**kwargs) for kwargs in metric_kwarg_list] def run(self, *args, **kwargs): for metric in self.metrics: metric.run(*args, **kwargs) def get_result_str(self): return ' '.join(metric.get_result_str() for metric in self.metrics) def update_autosummaries(self): for metric in self.metrics: metric.update_autosummaries() #---------------------------------------------------------------------------- # Dummy metric for debugging purposes. class DummyMetric(MetricBase): def _evaluate(self, Gs, Gs_kwargs, num_gpus): _ = Gs, Gs_kwargs, num_gpus self._report_result(0.0) #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/metrics/metric_defaults.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Default metric definitions.""" from dnnlib import EasyDict #---------------------------------------------------------------------------- metric_defaults = EasyDict([(args.name, args) for args in [ EasyDict(name='fid50k', func_name='metrics.frechet_inception_distance.FID', num_images=50000, minibatch_per_gpu=8), EasyDict(name='is50k', func_name='metrics.inception_score.IS', num_images=50000, num_splits=10, minibatch_per_gpu=8), EasyDict(name='ppl_zfull', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='full', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')), EasyDict(name='ppl_wfull', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='full', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')), EasyDict(name='ppl_zend', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='end', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')), EasyDict(name='ppl_wend', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')), EasyDict(name='ppl2_wend', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=False, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')), EasyDict(name='ls', func_name='metrics.linear_separability.LS', num_samples=200000, num_keep=100000, attrib_indices=range(40), minibatch_per_gpu=4), EasyDict(name='pr50k3', func_name='metrics.precision_recall.PR', num_images=50000, nhood_size=3, minibatch_per_gpu=8, row_batch_size=10000, col_batch_size=10000), ]]) #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/metrics/perceptual_path_length.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Perceptual Path Length (PPL).""" import numpy as np import tensorflow as tf import dnnlib.tflib as tflib from metrics import metric_base from training import misc #---------------------------------------------------------------------------- # Normalize batch of vectors. def normalize(v): return v / tf.sqrt(tf.reduce_sum(tf.square(v), axis=-1, keepdims=True)) # Spherical interpolation of a batch of vectors. def slerp(a, b, t): a = normalize(a) b = normalize(b) d = tf.reduce_sum(a * b, axis=-1, keepdims=True) p = t * tf.math.acos(d) c = normalize(b - d * a) d = a * tf.math.cos(p) + c * tf.math.sin(p) return normalize(d) #---------------------------------------------------------------------------- class PPL(metric_base.MetricBase): def __init__(self, num_samples, epsilon, space, sampling, crop, minibatch_per_gpu, Gs_overrides, **kwargs): assert space in ['z', 'w'] assert sampling in ['full', 'end'] super().__init__(**kwargs) self.num_samples = num_samples self.epsilon = epsilon self.space = space self.sampling = sampling self.crop = crop self.minibatch_per_gpu = minibatch_per_gpu self.Gs_overrides = Gs_overrides def _evaluate(self, Gs, Gs_kwargs, num_gpus): Gs_kwargs = dict(Gs_kwargs) Gs_kwargs.update(self.Gs_overrides) minibatch_size = num_gpus * self.minibatch_per_gpu # Construct TensorFlow graph. distance_expr = [] for gpu_idx in range(num_gpus): with tf.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() noise_vars = [var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise')] # Generate random latents and interpolation t-values. lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:]) lerp_t = tf.random_uniform([self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == 'full' else 0.0) labels = tf.reshape(tf.tile(self._get_random_labels_tf(self.minibatch_per_gpu), [1, 2]), [self.minibatch_per_gpu * 2, -1]) # Interpolate in W or Z. if self.space == 'w': dlat_t01 = Gs_clone.components.mapping.get_output_for(lat_t01, labels, **Gs_kwargs) dlat_t01 = tf.cast(dlat_t01, tf.float32) dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2] dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis]) dlat_e1 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon) dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1), dlat_t01.shape) else: # space == 'z' lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2] lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis]) lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon) lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1), lat_t01.shape) dlat_e01 = Gs_clone.components.mapping.get_output_for(lat_e01, labels, **Gs_kwargs) # Synthesize images. with tf.control_dependencies([var.initializer for var in noise_vars]): # use same noise inputs for the entire minibatch images = Gs_clone.components.synthesis.get_output_for(dlat_e01, randomize_noise=False, **Gs_kwargs) images = tf.cast(images, tf.float32) # Crop only the face region. if self.crop: c = int(images.shape[2] // 8) images = images[:, :, c*3 : c*7, c*2 : c*6] # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. factor = images.shape[2] // 256 if factor > 1: images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor]) images = tf.reduce_mean(images, axis=[3,5]) # Scale dynamic range from [-1,1] to [0,255] for VGG. images = (images + 1) * (255 / 2) # Evaluate perceptual distance. img_e0, img_e1 = images[0::2], images[1::2] distance_measure = misc.load_pkl('http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl') distance_expr.append(distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2)) # Sampling loop. all_distances = [] for begin in range(0, self.num_samples, minibatch_size): self._report_progress(begin, self.num_samples) all_distances += tflib.run(distance_expr) all_distances = np.concatenate(all_distances, axis=0) # Reject outliers. lo = np.percentile(all_distances, 1, interpolation='lower') hi = np.percentile(all_distances, 99, interpolation='higher') filtered_distances = np.extract(np.logical_and(lo <= all_distances, all_distances <= hi), all_distances) self._report_result(np.mean(filtered_distances)) #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/metrics/precision_recall.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Precision/Recall (PR).""" import os import numpy as np import tensorflow as tf import dnnlib import dnnlib.tflib as tflib from metrics import metric_base from training import misc #---------------------------------------------------------------------------- def batch_pairwise_distances(U, V): """ Compute pairwise distances between two batches of feature vectors.""" with tf.variable_scope('pairwise_dist_block'): # Squared norms of each row in U and V. norm_u = tf.reduce_sum(tf.square(U), 1) norm_v = tf.reduce_sum(tf.square(V), 1) # norm_u as a row and norm_v as a column vectors. norm_u = tf.reshape(norm_u, [-1, 1]) norm_v = tf.reshape(norm_v, [1, -1]) # Pairwise squared Euclidean distances. D = tf.maximum(norm_u - 2*tf.matmul(U, V, False, True) + norm_v, 0.0) return D #---------------------------------------------------------------------------- class DistanceBlock(): """Distance block.""" def __init__(self, num_features, num_gpus): self.num_features = num_features self.num_gpus = num_gpus # Initialize TF graph to calculate pairwise distances. with tf.device('/cpu:0'): self._features_batch1 = tf.placeholder(tf.float16, shape=[None, self.num_features]) self._features_batch2 = tf.placeholder(tf.float16, shape=[None, self.num_features]) features_split2 = tf.split(self._features_batch2, self.num_gpus, axis=0) distances_split = [] for gpu_idx in range(self.num_gpus): with tf.device('/gpu:%d' % gpu_idx): distances_split.append(batch_pairwise_distances(self._features_batch1, features_split2[gpu_idx])) self._distance_block = tf.concat(distances_split, axis=1) def pairwise_distances(self, U, V): """Evaluate pairwise distances between two batches of feature vectors.""" return self._distance_block.eval(feed_dict={self._features_batch1: U, self._features_batch2: V}) #---------------------------------------------------------------------------- class ManifoldEstimator(): """Finds an estimate for the manifold of given feature vectors.""" def __init__(self, distance_block, features, row_batch_size, col_batch_size, nhood_sizes, clamp_to_percentile=None): """Find an estimate of the manifold of given feature vectors.""" num_images = features.shape[0] self.nhood_sizes = nhood_sizes self.num_nhoods = len(nhood_sizes) self.row_batch_size = row_batch_size self.col_batch_size = col_batch_size self._ref_features = features self._distance_block = distance_block # Estimate manifold of features by calculating distances to kth nearest neighbor of each sample. self.D = np.zeros([num_images, self.num_nhoods], dtype=np.float16) distance_batch = np.zeros([row_batch_size, num_images], dtype=np.float16) seq = np.arange(max(self.nhood_sizes) + 1, dtype=np.int32) for begin1 in range(0, num_images, row_batch_size): end1 = min(begin1 + row_batch_size, num_images) row_batch = features[begin1:end1] for begin2 in range(0, num_images, col_batch_size): end2 = min(begin2 + col_batch_size, num_images) col_batch = features[begin2:end2] # Compute distances between batches. distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(row_batch, col_batch) # Find the kth nearest neighbor from the current batch. self.D[begin1:end1, :] = np.partition(distance_batch[0:end1-begin1, :], seq, axis=1)[:, self.nhood_sizes] if clamp_to_percentile is not None: max_distances = np.percentile(self.D, clamp_to_percentile, axis=0) self.D[self.D > max_distances] = 0 #max_distances # 0 def evaluate(self, eval_features, return_realism=False, return_neighbors=False): """Evaluate if new feature vectors are in the estimated manifold.""" num_eval_images = eval_features.shape[0] num_ref_images = self.D.shape[0] distance_batch = np.zeros([self.row_batch_size, num_ref_images], dtype=np.float16) batch_predictions = np.zeros([num_eval_images, self.num_nhoods], dtype=np.int32) #max_realism_score = np.zeros([num_eval_images,], dtype=np.float32) realism_score = np.zeros([num_eval_images,], dtype=np.float32) nearest_indices = np.zeros([num_eval_images,], dtype=np.int32) for begin1 in range(0, num_eval_images, self.row_batch_size): end1 = min(begin1 + self.row_batch_size, num_eval_images) feature_batch = eval_features[begin1:end1] for begin2 in range(0, num_ref_images, self.col_batch_size): end2 = min(begin2 + self.col_batch_size, num_ref_images) ref_batch = self._ref_features[begin2:end2] distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(feature_batch, ref_batch) # From the minibatch of new feature vectors, determine if they are in the estimated manifold. # If a feature vector is inside a hypersphere of some reference sample, then the new sample lies on the estimated manifold. # The radii of the hyperspheres are determined from distances of neighborhood size k. samples_in_manifold = distance_batch[0:end1-begin1, :, None] <= self.D batch_predictions[begin1:end1] = np.any(samples_in_manifold, axis=1).astype(np.int32) #max_realism_score[begin1:end1] = np.max(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1) #nearest_indices[begin1:end1] = np.argmax(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1) nearest_indices[begin1:end1] = np.argmin(distance_batch[0:end1-begin1, :], axis=1) realism_score[begin1:end1] = self.D[nearest_indices[begin1:end1], 0] / np.min(distance_batch[0:end1-begin1, :], axis=1) if return_realism and return_neighbors: return batch_predictions, realism_score, nearest_indices elif return_realism: return batch_predictions, realism_score elif return_neighbors: return batch_predictions, nearest_indices return batch_predictions #---------------------------------------------------------------------------- def knn_precision_recall_features(ref_features, eval_features, feature_net, nhood_sizes, row_batch_size, col_batch_size, num_gpus): """Calculates k-NN precision and recall for two sets of feature vectors.""" state = dnnlib.EasyDict() #num_images = ref_features.shape[0] num_features = feature_net.output_shape[1] state.ref_features = ref_features state.eval_features = eval_features # Initialize DistanceBlock and ManifoldEstimators. distance_block = DistanceBlock(num_features, num_gpus) state.ref_manifold = ManifoldEstimator(distance_block, state.ref_features, row_batch_size, col_batch_size, nhood_sizes) state.eval_manifold = ManifoldEstimator(distance_block, state.eval_features, row_batch_size, col_batch_size, nhood_sizes) # Evaluate precision and recall using k-nearest neighbors. #print('Evaluating k-NN precision and recall with %i samples...' % num_images) #start = time.time() # Precision: How many points from eval_features are in ref_features manifold. state.precision, state.realism_scores, state.nearest_neighbors = state.ref_manifold.evaluate(state.eval_features, return_realism=True, return_neighbors=True) state.knn_precision = state.precision.mean(axis=0) # Recall: How many points from ref_features are in eval_features manifold. state.recall = state.eval_manifold.evaluate(state.ref_features) state.knn_recall = state.recall.mean(axis=0) #elapsed_time = time.time() - start #print('Done evaluation in: %gs' % elapsed_time) return state #---------------------------------------------------------------------------- class PR(metric_base.MetricBase): def __init__(self, num_images, nhood_size, minibatch_per_gpu, row_batch_size, col_batch_size, **kwargs): super().__init__(**kwargs) self.num_images = num_images self.nhood_size = nhood_size self.minibatch_per_gpu = minibatch_per_gpu self.row_batch_size = row_batch_size self.col_batch_size = col_batch_size def _evaluate(self, Gs, Gs_kwargs, num_gpus): minibatch_size = num_gpus * self.minibatch_per_gpu feature_net = misc.load_pkl('http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16.pkl') # Calculate features for reals. cache_file = self._get_cache_file_for_reals(num_images=self.num_images) os.makedirs(os.path.dirname(cache_file), exist_ok=True) if os.path.isfile(cache_file): ref_features = misc.load_pkl(cache_file) else: ref_features = np.empty([self.num_images, feature_net.output_shape[1]], dtype=np.float32) for idx, images in enumerate(self._iterate_reals(minibatch_size=minibatch_size)): begin = idx * minibatch_size end = min(begin + minibatch_size, self.num_images) ref_features[begin:end] = feature_net.run(images[:end-begin], num_gpus=num_gpus, assume_frozen=True) if end == self.num_images: break misc.save_pkl(ref_features, cache_file) # Construct TensorFlow graph. result_expr = [] for gpu_idx in range(num_gpus): with tf.device('/gpu:%d' % gpu_idx): Gs_clone = Gs.clone() feature_net_clone = feature_net.clone() latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:]) labels = self._get_random_labels_tf(self.minibatch_per_gpu) images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs) images = tflib.convert_images_to_uint8(images) result_expr.append(feature_net_clone.get_output_for(images)) # Calculate features for fakes. eval_features = np.empty([self.num_images, feature_net.output_shape[1]], dtype=np.float32) for begin in range(0, self.num_images, minibatch_size): self._report_progress(begin, self.num_images) end = min(begin + minibatch_size, self.num_images) eval_features[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin] # Calculate precision and recall. state = knn_precision_recall_features(ref_features=ref_features, eval_features=eval_features, feature_net=feature_net, nhood_sizes=[self.nhood_size], row_batch_size=self.row_batch_size, col_batch_size=self.row_batch_size, num_gpus=num_gpus) self._report_result(state.knn_precision[0], suffix='_precision') self._report_result(state.knn_recall[0], suffix='_recall') #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/pretrained_networks.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """List of pre-trained StyleGAN2 networks located on Google Drive.""" import pickle import dnnlib import dnnlib.tflib as tflib #---------------------------------------------------------------------------- # StyleGAN2 Google Drive root: https://drive.google.com/open?id=1QHc-yF5C3DChRwSdZKcx1w6K8JvSxQi7 gdrive_urls = { 'gdrive:networks/stylegan2-car-config-a.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-a.pkl', 'gdrive:networks/stylegan2-car-config-b.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-b.pkl', 'gdrive:networks/stylegan2-car-config-c.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-c.pkl', 'gdrive:networks/stylegan2-car-config-d.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-d.pkl', 'gdrive:networks/stylegan2-car-config-e.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-e.pkl', 'gdrive:networks/stylegan2-car-config-f.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-f.pkl', 'gdrive:networks/stylegan2-cat-config-a.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-cat-config-a.pkl', 'gdrive:networks/stylegan2-cat-config-f.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-cat-config-f.pkl', 'gdrive:networks/stylegan2-church-config-a.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-church-config-a.pkl', 'gdrive:networks/stylegan2-church-config-f.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-church-config-f.pkl', 'gdrive:networks/stylegan2-ffhq-config-a.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-a.pkl', 'gdrive:networks/stylegan2-ffhq-config-b.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-b.pkl', 'gdrive:networks/stylegan2-ffhq-config-c.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-c.pkl', 'gdrive:networks/stylegan2-ffhq-config-d.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-d.pkl', 'gdrive:networks/stylegan2-ffhq-config-e.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-e.pkl', 'gdrive:networks/stylegan2-ffhq-config-f.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-f.pkl', 'gdrive:networks/stylegan2-horse-config-a.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-horse-config-a.pkl', 'gdrive:networks/stylegan2-horse-config-f.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-horse-config-f.pkl', 'gdrive:networks/table2/stylegan2-car-config-e-Gorig-Dorig.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gorig-Dorig.pkl', 'gdrive:networks/table2/stylegan2-car-config-e-Gorig-Dresnet.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gorig-Dresnet.pkl', 'gdrive:networks/table2/stylegan2-car-config-e-Gorig-Dskip.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gorig-Dskip.pkl', 'gdrive:networks/table2/stylegan2-car-config-e-Gresnet-Dorig.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gresnet-Dorig.pkl', 'gdrive:networks/table2/stylegan2-car-config-e-Gresnet-Dresnet.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gresnet-Dresnet.pkl', 'gdrive:networks/table2/stylegan2-car-config-e-Gresnet-Dskip.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gresnet-Dskip.pkl', 'gdrive:networks/table2/stylegan2-car-config-e-Gskip-Dorig.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gskip-Dorig.pkl', 'gdrive:networks/table2/stylegan2-car-config-e-Gskip-Dresnet.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gskip-Dresnet.pkl', 'gdrive:networks/table2/stylegan2-car-config-e-Gskip-Dskip.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-car-config-e-Gskip-Dskip.pkl', 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gorig-Dorig.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gorig-Dorig.pkl', 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gorig-Dresnet.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gorig-Dresnet.pkl', 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gorig-Dskip.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gorig-Dskip.pkl', 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gresnet-Dorig.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gresnet-Dorig.pkl', 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gresnet-Dresnet.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gresnet-Dresnet.pkl', 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gresnet-Dskip.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gresnet-Dskip.pkl', 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gskip-Dorig.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gskip-Dorig.pkl', 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gskip-Dresnet.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gskip-Dresnet.pkl', 'gdrive:networks/table2/stylegan2-ffhq-config-e-Gskip-Dskip.pkl': 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gskip-Dskip.pkl', } #---------------------------------------------------------------------------- def get_path_or_url(path_or_gdrive_path): return gdrive_urls.get(path_or_gdrive_path, path_or_gdrive_path) #---------------------------------------------------------------------------- _cached_networks = dict() def load_networks(path_or_gdrive_path): path_or_url = get_path_or_url(path_or_gdrive_path) if path_or_url in _cached_networks: return _cached_networks[path_or_url] if dnnlib.util.is_url(path_or_url): stream = dnnlib.util.open_url(path_or_url, cache_dir='.stylegan2-cache') else: stream = open(path_or_url, 'rb') tflib.init_tf() with stream: G, D, Gs = pickle.load(stream, encoding='latin1') _cached_networks[path_or_url] = G, D, Gs return G, D, Gs #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/projector.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html import numpy as np import tensorflow as tf import dnnlib import dnnlib.tflib as tflib from training import misc #---------------------------------------------------------------------------- class Projector: def __init__(self): self.num_steps = 1000 self.dlatent_avg_samples = 10000 self.initial_learning_rate = 0.1 self.initial_noise_factor = 0.05 self.lr_rampdown_length = 0.25 self.lr_rampup_length = 0.05 self.noise_ramp_length = 0.75 self.regularize_noise_weight = 1e5 self.verbose = False self.clone_net = True self._Gs = None self._minibatch_size = None self._dlatent_avg = None self._dlatent_std = None self._noise_vars = None self._noise_init_op = None self._noise_normalize_op = None self._dlatents_var = None self._noise_in = None self._dlatents_expr = None self._images_expr = None self._target_images_var = None self._lpips = None self._dist = None self._loss = None self._reg_sizes = None self._lrate_in = None self._opt = None self._opt_step = None self._cur_step = None def _info(self, *args): if self.verbose: print('Projector:', *args) def set_network(self, Gs, minibatch_size=1): assert minibatch_size == 1 self._Gs = Gs self._minibatch_size = minibatch_size if self._Gs is None: return if self.clone_net: self._Gs = self._Gs.clone() # Find dlatent stats. self._info('Finding W midpoint and stddev using %d samples...' % self.dlatent_avg_samples) latent_samples = np.random.RandomState(123).randn(self.dlatent_avg_samples, *self._Gs.input_shapes[0][1:]) dlatent_samples = self._Gs.components.mapping.run(latent_samples, None)[:, :1, :] # [N, 1, 512] self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 1, 512] self._dlatent_std = (np.sum((dlatent_samples - self._dlatent_avg) ** 2) / self.dlatent_avg_samples) ** 0.5 self._info('std = %g' % self._dlatent_std) # Find noise inputs. self._info('Setting up noise inputs...') self._noise_vars = [] noise_init_ops = [] noise_normalize_ops = [] while True: n = 'G_synthesis/noise%d' % len(self._noise_vars) if not n in self._Gs.vars: break v = self._Gs.vars[n] self._noise_vars.append(v) noise_init_ops.append(tf.assign(v, tf.random_normal(tf.shape(v), dtype=tf.float32))) noise_mean = tf.reduce_mean(v) noise_std = tf.reduce_mean((v - noise_mean)**2)**0.5 noise_normalize_ops.append(tf.assign(v, (v - noise_mean) / noise_std)) self._info(n, v) self._noise_init_op = tf.group(*noise_init_ops) self._noise_normalize_op = tf.group(*noise_normalize_ops) # Image output graph. self._info('Building image output graph...') self._dlatents_var = tf.Variable(tf.zeros([self._minibatch_size] + list(self._dlatent_avg.shape[1:])), name='dlatents_var') self._noise_in = tf.placeholder(tf.float32, [], name='noise_in') dlatents_noise = tf.random.normal(shape=self._dlatents_var.shape) * self._noise_in self._dlatents_expr = tf.tile(self._dlatents_var + dlatents_noise, [1, self._Gs.components.synthesis.input_shape[1], 1]) self._images_expr = self._Gs.components.synthesis.get_output_for(self._dlatents_expr, randomize_noise=False) # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. proc_images_expr = (self._images_expr + 1) * (255 / 2) sh = proc_images_expr.shape.as_list() if sh[2] > 256: factor = sh[2] // 256 proc_images_expr = tf.reduce_mean(tf.reshape(proc_images_expr, [-1, sh[1], sh[2] // factor, factor, sh[2] // factor, factor]), axis=[3,5]) # Loss graph. self._info('Building loss graph...') self._target_images_var = tf.Variable(tf.zeros(proc_images_expr.shape), name='target_images_var') if self._lpips is None: self._lpips = misc.load_pkl('http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl') self._dist = self._lpips.get_output_for(proc_images_expr, self._target_images_var) self._loss = tf.reduce_sum(self._dist) # Noise regularization graph. self._info('Building noise regularization graph...') reg_loss = 0.0 for v in self._noise_vars: sz = v.shape[2] while True: reg_loss += tf.reduce_mean(v * tf.roll(v, shift=1, axis=3))**2 + tf.reduce_mean(v * tf.roll(v, shift=1, axis=2))**2 if sz <= 8: break # Small enough already v = tf.reshape(v, [1, 1, sz//2, 2, sz//2, 2]) # Downscale v = tf.reduce_mean(v, axis=[3, 5]) sz = sz // 2 self._loss += reg_loss * self.regularize_noise_weight # Optimizer. self._info('Setting up optimizer...') self._lrate_in = tf.placeholder(tf.float32, [], name='lrate_in') self._opt = dnnlib.tflib.Optimizer(learning_rate=self._lrate_in) self._opt.register_gradients(self._loss, [self._dlatents_var] + self._noise_vars) self._opt_step = self._opt.apply_updates() def run(self, target_images): # Run to completion. self.start(target_images) while self._cur_step < self.num_steps: self.step() # Collect results. pres = dnnlib.EasyDict() pres.dlatents = self.get_dlatents() pres.noises = self.get_noises() pres.images = self.get_images() return pres def start(self, target_images): assert self._Gs is not None # Prepare target images. self._info('Preparing target images...') target_images = np.asarray(target_images, dtype='float32') target_images = (target_images + 1) * (255 / 2) sh = target_images.shape assert sh[0] == self._minibatch_size if sh[2] > self._target_images_var.shape[2]: factor = sh[2] // self._target_images_var.shape[2] target_images = np.reshape(target_images, [-1, sh[1], sh[2] // factor, factor, sh[3] // factor, factor]).mean((3, 5)) # Initialize optimization state. self._info('Initializing optimization state...') tflib.set_vars({self._target_images_var: target_images, self._dlatents_var: np.tile(self._dlatent_avg, [self._minibatch_size, 1, 1])}) tflib.run(self._noise_init_op) self._opt.reset_optimizer_state() self._cur_step = 0 def step(self): assert self._cur_step is not None if self._cur_step >= self.num_steps: return if self._cur_step == 0: self._info('Running...') # Hyperparameters. t = self._cur_step / self.num_steps noise_strength = self._dlatent_std * self.initial_noise_factor * max(0.0, 1.0 - t / self.noise_ramp_length) ** 2 lr_ramp = min(1.0, (1.0 - t) / self.lr_rampdown_length) lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi) lr_ramp = lr_ramp * min(1.0, t / self.lr_rampup_length) learning_rate = self.initial_learning_rate * lr_ramp # Train. feed_dict = {self._noise_in: noise_strength, self._lrate_in: learning_rate} _, dist_value, loss_value = tflib.run([self._opt_step, self._dist, self._loss], feed_dict) tflib.run(self._noise_normalize_op) # Print status. self._cur_step += 1 if self._cur_step == self.num_steps or self._cur_step % 10 == 0: self._info('%-8d%-12g%-12g' % (self._cur_step, dist_value, loss_value)) if self._cur_step == self.num_steps: self._info('Done.') def get_cur_step(self): return self._cur_step def get_dlatents(self): return tflib.run(self._dlatents_expr, {self._noise_in: 0}) def get_noises(self): return tflib.run(self._noise_vars) def get_images(self): return tflib.run(self._images_expr, {self._noise_in: 0}) #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/run_generator.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html import argparse import numpy as np import PIL.Image import dnnlib import dnnlib.tflib as tflib import re import sys import pretrained_networks #---------------------------------------------------------------------------- def generate_images(network_pkl, seeds, truncation_psi): print('Loading networks from "%s"...' % network_pkl) _G, _D, Gs = pretrained_networks.load_networks(network_pkl) noise_vars = [var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')] Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) Gs_kwargs.randomize_noise = False if truncation_psi is not None: Gs_kwargs.truncation_psi = truncation_psi for seed_idx, seed in enumerate(seeds): print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds))) rnd = np.random.RandomState(seed) z = rnd.randn(1, *Gs.input_shape[1:]) # [minibatch, component] tflib.set_vars({var: rnd.randn(*var.shape.as_list()) for var in noise_vars}) # [height, width] images = Gs.run(z, None, **Gs_kwargs) # [minibatch, height, width, channel] PIL.Image.fromarray(images[0], 'RGB').save(dnnlib.make_run_dir_path('seed%04d.png' % seed)) #---------------------------------------------------------------------------- def style_mixing_example(network_pkl, row_seeds, col_seeds, truncation_psi, col_styles, minibatch_size=4): print('Loading networks from "%s"...' % network_pkl) _G, _D, Gs = pretrained_networks.load_networks(network_pkl) w_avg = Gs.get_var('dlatent_avg') # [component] Gs_syn_kwargs = dnnlib.EasyDict() Gs_syn_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True) Gs_syn_kwargs.randomize_noise = False Gs_syn_kwargs.minibatch_size = minibatch_size print('Generating W vectors...') all_seeds = list(set(row_seeds + col_seeds)) all_z = np.stack([np.random.RandomState(seed).randn(*Gs.input_shape[1:]) for seed in all_seeds]) # [minibatch, component] all_w = Gs.components.mapping.run(all_z, None) # [minibatch, layer, component] all_w = w_avg + (all_w - w_avg) * truncation_psi # [minibatch, layer, component] w_dict = {seed: w for seed, w in zip(all_seeds, list(all_w))} # [layer, component] print('Generating images...') all_images = Gs.components.synthesis.run(all_w, **Gs_syn_kwargs) # [minibatch, height, width, channel] image_dict = {(seed, seed): image for seed, image in zip(all_seeds, list(all_images))} print('Generating style-mixed images...') for row_seed in row_seeds: for col_seed in col_seeds: w = w_dict[row_seed].copy() w[col_styles] = w_dict[col_seed][col_styles] image = Gs.components.synthesis.run(w[np.newaxis], **Gs_syn_kwargs)[0] image_dict[(row_seed, col_seed)] = image print('Saving images...') for (row_seed, col_seed), image in image_dict.items(): PIL.Image.fromarray(image, 'RGB').save(dnnlib.make_run_dir_path('%d-%d.png' % (row_seed, col_seed))) print('Saving image grid...') _N, _C, H, W = Gs.output_shape canvas = PIL.Image.new('RGB', (W * (len(col_seeds) + 1), H * (len(row_seeds) + 1)), 'black') for row_idx, row_seed in enumerate([None] + row_seeds): for col_idx, col_seed in enumerate([None] + col_seeds): if row_seed is None and col_seed is None: continue key = (row_seed, col_seed) if row_seed is None: key = (col_seed, col_seed) if col_seed is None: key = (row_seed, row_seed) canvas.paste(PIL.Image.fromarray(image_dict[key], 'RGB'), (W * col_idx, H * row_idx)) canvas.save(dnnlib.make_run_dir_path('grid.png')) #---------------------------------------------------------------------------- def _parse_num_range(s): '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' range_re = re.compile(r'^(\d+)-(\d+)$') m = range_re.match(s) if m: return range(int(m.group(1)), int(m.group(2))+1) vals = s.split(',') return [int(x) for x in vals] #---------------------------------------------------------------------------- _examples = '''examples: # Generate ffhq uncurated images (matches paper Figure 12) python %(prog)s generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --seeds=6600-6625 --truncation-psi=0.5 # Generate ffhq curated images (matches paper Figure 11) python %(prog)s generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --seeds=66,230,389,1518 --truncation-psi=1.0 # Generate uncurated car images (matches paper Figure 12) python %(prog)s generate-images --network=gdrive:networks/stylegan2-car-config-f.pkl --seeds=6000-6025 --truncation-psi=0.5 # Generate style mixing example (matches style mixing video clip) python %(prog)s style-mixing-example --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --row-seeds=85,100,75,458,1500 --col-seeds=55,821,1789,293 --truncation-psi=1.0 ''' #---------------------------------------------------------------------------- def main(): parser = argparse.ArgumentParser( description='''StyleGAN2 generator. Run 'python %(prog)s --help' for subcommand help.''', epilog=_examples, formatter_class=argparse.RawDescriptionHelpFormatter ) subparsers = parser.add_subparsers(help='Sub-commands', dest='command') parser_generate_images = subparsers.add_parser('generate-images', help='Generate images') parser_generate_images.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) parser_generate_images.add_argument('--seeds', type=_parse_num_range, help='List of random seeds', required=True) parser_generate_images.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5) parser_generate_images.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') parser_style_mixing_example = subparsers.add_parser('style-mixing-example', help='Generate style mixing video') parser_style_mixing_example.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) parser_style_mixing_example.add_argument('--row-seeds', type=_parse_num_range, help='Random seeds to use for image rows', required=True) parser_style_mixing_example.add_argument('--col-seeds', type=_parse_num_range, help='Random seeds to use for image columns', required=True) parser_style_mixing_example.add_argument('--col-styles', type=_parse_num_range, help='Style layer range (default: %(default)s)', default='0-6') parser_style_mixing_example.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5) parser_style_mixing_example.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') args = parser.parse_args() kwargs = vars(args) subcmd = kwargs.pop('command') if subcmd is None: print ('Error: missing subcommand. Re-run with --help for usage.') sys.exit(1) sc = dnnlib.SubmitConfig() sc.num_gpus = 1 sc.submit_target = dnnlib.SubmitTarget.LOCAL sc.local.do_not_copy_source_files = True sc.run_dir_root = kwargs.pop('result_dir') sc.run_desc = subcmd func_name_map = { 'generate-images': 'run_generator.generate_images', 'style-mixing-example': 'run_generator.style_mixing_example' } dnnlib.submit_run(sc, func_name_map[subcmd], **kwargs) #---------------------------------------------------------------------------- if __name__ == "__main__": main() #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/run_metrics.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html import argparse import os import sys import dnnlib import dnnlib.tflib as tflib import pretrained_networks from metrics import metric_base from metrics.metric_defaults import metric_defaults #---------------------------------------------------------------------------- def run(network_pkl, metrics, dataset, data_dir, mirror_augment): print('Evaluating metrics "%s" for "%s"...' % (','.join(metrics), network_pkl)) tflib.init_tf() network_pkl = pretrained_networks.get_path_or_url(network_pkl) dataset_args = dnnlib.EasyDict(tfrecord_dir=dataset, shuffle_mb=0) num_gpus = dnnlib.submit_config.num_gpus metric_group = metric_base.MetricGroup([metric_defaults[metric] for metric in metrics]) metric_group.run(network_pkl, data_dir=data_dir, dataset_args=dataset_args, mirror_augment=mirror_augment, num_gpus=num_gpus) #---------------------------------------------------------------------------- def _str_to_bool(v): if isinstance(v, bool): return v if v.lower() in ('yes', 'true', 't', 'y', '1'): return True elif v.lower() in ('no', 'false', 'f', 'n', '0'): return False else: raise argparse.ArgumentTypeError('Boolean value expected.') #---------------------------------------------------------------------------- _examples = '''examples: python %(prog)s --data-dir=~/datasets --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --metrics=fid50k,ppl_wend --dataset=ffhq --mirror-augment=true valid metrics: ''' + ', '.join(sorted([x for x in metric_defaults.keys()])) + ''' ''' def main(): parser = argparse.ArgumentParser( description='Run StyleGAN2 metrics.', epilog=_examples, formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) parser.add_argument('--metrics', help='Metrics to compute (default: %(default)s)', default='fid50k', type=lambda x: x.split(',')) parser.add_argument('--dataset', help='Training dataset', required=True) parser.add_argument('--data-dir', help='Dataset root directory', required=True) parser.add_argument('--mirror-augment', help='Mirror augment (default: %(default)s)', default=False, type=_str_to_bool, metavar='BOOL') parser.add_argument('--num-gpus', help='Number of GPUs to use', type=int, default=1, metavar='N') args = parser.parse_args() if not os.path.exists(args.data_dir): print ('Error: dataset root directory does not exist.') sys.exit(1) kwargs = vars(args) sc = dnnlib.SubmitConfig() sc.num_gpus = kwargs.pop('num_gpus') sc.submit_target = dnnlib.SubmitTarget.LOCAL sc.local.do_not_copy_source_files = True sc.run_dir_root = kwargs.pop('result_dir') sc.run_desc = 'run-metrics' dnnlib.submit_run(sc, 'run_metrics.run', **kwargs) #---------------------------------------------------------------------------- if __name__ == "__main__": main() #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/run_projector.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html import argparse import numpy as np import dnnlib import dnnlib.tflib as tflib import re import sys import projector import pretrained_networks from training import dataset from training import misc #---------------------------------------------------------------------------- def project_image(proj, targets, png_prefix, num_snapshots): snapshot_steps = set(proj.num_steps - np.linspace(0, proj.num_steps, num_snapshots, endpoint=False, dtype=int)) misc.save_image_grid(targets, png_prefix + 'target.png', drange=[-1,1]) proj.start(targets) while proj.get_cur_step() < proj.num_steps: print('\r%d / %d ... ' % (proj.get_cur_step(), proj.num_steps), end='', flush=True) proj.step() if proj.get_cur_step() in snapshot_steps: misc.save_image_grid(proj.get_images(), png_prefix + 'step%04d.png' % proj.get_cur_step(), drange=[-1,1]) print('\r%-30s\r' % '', end='', flush=True) #---------------------------------------------------------------------------- def project_generated_images(network_pkl, seeds, num_snapshots, truncation_psi): print('Loading networks from "%s"...' % network_pkl) _G, _D, Gs = pretrained_networks.load_networks(network_pkl) proj = projector.Projector() proj.set_network(Gs) noise_vars = [var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')] Gs_kwargs = dnnlib.EasyDict() Gs_kwargs.randomize_noise = False Gs_kwargs.truncation_psi = truncation_psi for seed_idx, seed in enumerate(seeds): print('Projecting seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds))) rnd = np.random.RandomState(seed) z = rnd.randn(1, *Gs.input_shape[1:]) tflib.set_vars({var: rnd.randn(*var.shape.as_list()) for var in noise_vars}) images = Gs.run(z, None, **Gs_kwargs) project_image(proj, targets=images, png_prefix=dnnlib.make_run_dir_path('seed%04d-' % seed), num_snapshots=num_snapshots) #---------------------------------------------------------------------------- def project_real_images(network_pkl, dataset_name, data_dir, num_images, num_snapshots): print('Loading networks from "%s"...' % network_pkl) _G, _D, Gs = pretrained_networks.load_networks(network_pkl) proj = projector.Projector() proj.set_network(Gs) print('Loading images from "%s"...' % dataset_name) dataset_obj = dataset.load_dataset(data_dir=data_dir, tfrecord_dir=dataset_name, max_label_size=0, repeat=False, shuffle_mb=0) assert dataset_obj.shape == Gs.output_shape[1:] for image_idx in range(num_images): print('Projecting image %d/%d ...' % (image_idx, num_images)) images, _labels = dataset_obj.get_minibatch_np(1) images = misc.adjust_dynamic_range(images, [0, 255], [-1, 1]) project_image(proj, targets=images, png_prefix=dnnlib.make_run_dir_path('image%04d-' % image_idx), num_snapshots=num_snapshots) #---------------------------------------------------------------------------- def _parse_num_range(s): '''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.''' range_re = re.compile(r'^(\d+)-(\d+)$') m = range_re.match(s) if m: return range(int(m.group(1)), int(m.group(2))+1) vals = s.split(',') return [int(x) for x in vals] #---------------------------------------------------------------------------- _examples = '''examples: # Project generated images python %(prog)s project-generated-images --network=gdrive:networks/stylegan2-car-config-f.pkl --seeds=0,1,5 # Project real images python %(prog)s project-real-images --network=gdrive:networks/stylegan2-car-config-f.pkl --dataset=car --data-dir=~/datasets ''' #---------------------------------------------------------------------------- def main(): parser = argparse.ArgumentParser( description='''StyleGAN2 projector. Run 'python %(prog)s --help' for subcommand help.''', epilog=_examples, formatter_class=argparse.RawDescriptionHelpFormatter ) subparsers = parser.add_subparsers(help='Sub-commands', dest='command') project_generated_images_parser = subparsers.add_parser('project-generated-images', help='Project generated images') project_generated_images_parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) project_generated_images_parser.add_argument('--seeds', type=_parse_num_range, help='List of random seeds', default=range(3)) project_generated_images_parser.add_argument('--num-snapshots', type=int, help='Number of snapshots (default: %(default)s)', default=5) project_generated_images_parser.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=1.0) project_generated_images_parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') project_real_images_parser = subparsers.add_parser('project-real-images', help='Project real images') project_real_images_parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True) project_real_images_parser.add_argument('--data-dir', help='Dataset root directory', required=True) project_real_images_parser.add_argument('--dataset', help='Training dataset', dest='dataset_name', required=True) project_real_images_parser.add_argument('--num-snapshots', type=int, help='Number of snapshots (default: %(default)s)', default=5) project_real_images_parser.add_argument('--num-images', type=int, help='Number of images to project (default: %(default)s)', default=3) project_real_images_parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') args = parser.parse_args() subcmd = args.command if subcmd is None: print ('Error: missing subcommand. Re-run with --help for usage.') sys.exit(1) kwargs = vars(args) sc = dnnlib.SubmitConfig() sc.num_gpus = 1 sc.submit_target = dnnlib.SubmitTarget.LOCAL sc.local.do_not_copy_source_files = True sc.run_dir_root = kwargs.pop('result_dir') sc.run_desc = kwargs.pop('command') func_name_map = { 'project-generated-images': 'run_projector.project_generated_images', 'project-real-images': 'run_projector.project_real_images' } dnnlib.submit_run(sc, func_name_map[subcmd], **kwargs) #---------------------------------------------------------------------------- if __name__ == "__main__": main() #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/run_training.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html import argparse import copy import os import sys import dnnlib from dnnlib import EasyDict from metrics.metric_defaults import metric_defaults #---------------------------------------------------------------------------- _valid_configs = [ # Table 1 'config-a', # Baseline StyleGAN 'config-b', # + Weight demodulation 'config-c', # + Lazy regularization 'config-d', # + Path length regularization 'config-e', # + No growing, new G & D arch. 'config-f', # + Large networks (default) # Table 2 'config-e-Gorig-Dorig', 'config-e-Gorig-Dresnet', 'config-e-Gorig-Dskip', 'config-e-Gresnet-Dorig', 'config-e-Gresnet-Dresnet', 'config-e-Gresnet-Dskip', 'config-e-Gskip-Dorig', 'config-e-Gskip-Dresnet', 'config-e-Gskip-Dskip', ] #---------------------------------------------------------------------------- def run(dataset, data_dir, result_dir, config_id, num_gpus, total_kimg, gamma, mirror_augment, metrics, commitment_cost, discrete_layer, decay, D_type): train = EasyDict(run_func_name='training.training_loop.training_loop') # Options for training loop. G = EasyDict(func_name='training.networks_stylegan2.G_main') # Options for generator network. if D_type == 1: D = EasyDict(func_name='training.networks_stylegan2.D_stylegan2_quant') # Options for else: D = EasyDict(func_name='training.networks_stylegan2.D_stylegan2') # Options # for # discriminator network. G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for generator optimizer. D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for discriminator optimizer. G_loss = EasyDict(func_name='training.loss.G_logistic_ns_pathreg') # Options for generator loss. D_loss = EasyDict(func_name='training.loss.D_logistic_r1') # Options for discriminator loss. sched = EasyDict() # Options for TrainingSchedule. grid = EasyDict(size='8k', layout='random') # Options for setup_snapshot_image_grid(). sc = dnnlib.SubmitConfig() # Options for dnnlib.submit_run(). tf_config = {'rnd.np_random_seed': 1000} # Options for tflib.init_tf(). D.commitment_cost = commitment_cost D.discrete_layer = discrete_layer D.decay = decay train.data_dir = data_dir train.total_kimg = total_kimg train.mirror_augment = mirror_augment train.image_snapshot_ticks = train.network_snapshot_ticks = 10 sched.G_lrate_base = sched.D_lrate_base = 0.002 sched.minibatch_size_base = 32 sched.minibatch_gpu_base = 4 D_loss.gamma = 10 metrics = [metric_defaults[x] for x in metrics] desc = 'stylegan2' desc += '-' + dataset dataset_args = EasyDict(tfrecord_dir=dataset) assert num_gpus in [1, 2, 4, 8] sc.num_gpus = num_gpus desc += '-%dgpu' % num_gpus assert config_id in _valid_configs desc += '-' + config_id # Configs A-E: Shrink networks to match original StyleGAN. if config_id != 'config-f': G.fmap_base = D.fmap_base = 8 << 10 # Config E: Set gamma to 100 and override G & D architecture. if config_id.startswith('config-e'): D_loss.gamma = 100 if 'Gorig' in config_id: G.architecture = 'orig' if 'Gskip' in config_id: G.architecture = 'skip' # (default) if 'Gresnet' in config_id: G.architecture = 'resnet' if 'Dorig' in config_id: D.architecture = 'orig' if 'Dskip' in config_id: D.architecture = 'skip' if 'Dresnet' in config_id: D.architecture = 'resnet' # (default) # Configs A-D: Enable progressive growing and switch to networks that support it. if config_id in ['config-a', 'config-b', 'config-c', 'config-d']: sched.lod_initial_resolution = 8 sched.G_lrate_base = sched.D_lrate_base = 0.001 sched.G_lrate_dict = sched.D_lrate_dict = {128: 0.0015, 256: 0.002, 512: 0.003, 1024: 0.003} sched.minibatch_size_base = 32 # (default) sched.minibatch_size_dict = {8: 256, 16: 128, 32: 64, 64: 32} sched.minibatch_gpu_base = 4 # (default) sched.minibatch_gpu_dict = {8: 32, 16: 16, 32: 8, 64: 4} G.synthesis_func = 'G_synthesis_stylegan_revised' D.func_name = 'training.networks_stylegan2.D_stylegan' # Configs A-C: Disable path length regularization. if config_id in ['config-a', 'config-b', 'config-c']: G_loss = EasyDict(func_name='training.loss.G_logistic_ns') # Configs A-B: Disable lazy regularization. if config_id in ['config-a', 'config-b']: train.lazy_regularization = False # Config A: Switch to original StyleGAN networks. if config_id == 'config-a': G = EasyDict(func_name='training.networks_stylegan.G_style') D = EasyDict(func_name='training.networks_stylegan.D_basic') if gamma is not None: D_loss.gamma = gamma sc.submit_target = dnnlib.SubmitTarget.LOCAL sc.local.do_not_copy_source_files = True kwargs = EasyDict(train) kwargs.update(G_args=G, D_args=D, G_opt_args=G_opt, D_opt_args=D_opt, G_loss_args=G_loss, D_loss_args=D_loss) kwargs.update(dataset_args=dataset_args, sched_args=sched, grid_args=grid, metric_arg_list=metrics, tf_config=tf_config) kwargs.submit_config = copy.deepcopy(sc) kwargs.submit_config.run_dir_root = result_dir kwargs.submit_config.run_desc = desc dnnlib.submit_run(**kwargs) #---------------------------------------------------------------------------- def _str_to_bool(v): if isinstance(v, bool): return v if v.lower() in ('yes', 'true', 't', 'y', '1'): return True elif v.lower() in ('no', 'false', 'f', 'n', '0'): return False else: raise argparse.ArgumentTypeError('Boolean value expected.') def _parse_comma_sep(s): if s is None or s.lower() == 'none' or s == '': return [] return s.split(',') #---------------------------------------------------------------------------- _examples = '''examples: # Train StyleGAN2 using the FFHQ dataset python %(prog)s --num-gpus=8 --data-dir=~/datasets --config=config-f --dataset=ffhq --mirror-augment=true valid configs: ''' + ', '.join(_valid_configs) + ''' valid metrics: ''' + ', '.join(sorted([x for x in metric_defaults.keys()])) + ''' ''' def main(): parser = argparse.ArgumentParser( description='Train StyleGAN2.', epilog=_examples, formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR') parser.add_argument('--data-dir', help='Dataset root directory', required=True) parser.add_argument('--dataset', help='Training dataset', required=True) parser.add_argument('--config', help='Training config (default: %(default)s)', default='config-f', required=True, dest='config_id', metavar='CONFIG') parser.add_argument('--num-gpus', help='Number of GPUs (default: %(default)s)', default=1, type=int, metavar='N') parser.add_argument('--total-kimg', help='Training length in thousands of images (default: %(default)s)', metavar='KIMG', default=25000, type=int) parser.add_argument('--gamma', help='R1 regularization weight (default is config dependent)', default=None, type=float) parser.add_argument('--mirror-augment', help='Mirror augment (default: %(default)s)', default=False, metavar='BOOL', type=_str_to_bool) parser.add_argument('--metrics', help='Comma-separated list of metrics or "none" (default: %(default)s)', default='fid50k', type=_parse_comma_sep) parser.add_argument('--discrete_layer', default='45',type=str) parser.add_argument('--commitment_cost', default=0.25,type=float) parser.add_argument('--decay', default=0.8, type=float) parser.add_argument('--D_type', default=1, type=int) args = parser.parse_args() if not os.path.exists(args.data_dir): print ('Error: dataset root directory does not exist.') sys.exit(1) if args.config_id not in _valid_configs: print ('Error: --config value must be one of: ', ', '.join(_valid_configs)) sys.exit(1) for metric in args.metrics: if metric not in metric_defaults: print ('Error: unknown metric \'%s\'' % metric) sys.exit(1) run(**vars(args)) #---------------------------------------------------------------------------- if __name__ == "__main__": main() #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/test_nvcc.cu ================================================ // Copyright (c) 2019, NVIDIA Corporation. All rights reserved. // // This work is made available under the Nvidia Source Code License-NC. // To view a copy of this license, visit // https://nvlabs.github.io/stylegan2/license.html #include void checkCudaError(cudaError_t err) { if (err != cudaSuccess) { printf("%s: %s\n", cudaGetErrorName(err), cudaGetErrorString(err)); exit(1); } } __global__ void cudaKernel(void) { printf("GPU says hello.\n"); } int main(void) { printf("CPU says hello.\n"); checkCudaError(cudaLaunchKernel((void*)cudaKernel, 1, 1, NULL, 0, NULL)); checkCudaError(cudaDeviceSynchronize()); return 0; } ================================================ FILE: FQ-StyleGAN/training/__init__.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html # empty ================================================ FILE: FQ-StyleGAN/training/dataset.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Multi-resolution input data pipeline.""" import os import glob import numpy as np import tensorflow as tf import dnnlib import dnnlib.tflib as tflib #---------------------------------------------------------------------------- # Dataset class that loads data from tfrecords files. class TFRecordDataset: def __init__(self, tfrecord_dir, # Directory containing a collection of tfrecords files. resolution = None, # Dataset resolution, None = autodetect. label_file = None, # Relative path of the labels file, None = autodetect. max_label_size = 0, # 0 = no labels, 'full' = full labels, = N first label components. max_images = None, # Maximum number of images to use, None = use all images. repeat = True, # Repeat dataset indefinitely? shuffle_mb = 4096, # Shuffle data within specified window (megabytes), 0 = disable shuffling. prefetch_mb = 2048, # Amount of data to prefetch (megabytes), 0 = disable prefetching. buffer_mb = 256, # Read buffer size (megabytes). num_threads = 2): # Number of concurrent threads. self.tfrecord_dir = tfrecord_dir self.resolution = None self.resolution_log2 = None self.shape = [] # [channels, height, width] self.dtype = 'uint8' self.dynamic_range = [0, 255] self.label_file = label_file self.label_size = None # components self.label_dtype = None self._np_labels = None self._tf_minibatch_in = None self._tf_labels_var = None self._tf_labels_dataset = None self._tf_datasets = dict() self._tf_iterator = None self._tf_init_ops = dict() self._tf_minibatch_np = None self._cur_minibatch = -1 self._cur_lod = -1 # List tfrecords files and inspect their shapes. assert os.path.isdir(self.tfrecord_dir) tfr_files = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*.tfrecords'))) assert len(tfr_files) >= 1 tfr_shapes = [] for tfr_file in tfr_files: tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE) for record in tf.python_io.tf_record_iterator(tfr_file, tfr_opt): tfr_shapes.append(self.parse_tfrecord_np(record).shape) break # Autodetect label filename. if self.label_file is None: guess = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*.labels'))) if len(guess): self.label_file = guess[0] elif not os.path.isfile(self.label_file): guess = os.path.join(self.tfrecord_dir, self.label_file) if os.path.isfile(guess): self.label_file = guess # Determine shape and resolution. max_shape = max(tfr_shapes, key=np.prod) self.resolution = resolution if resolution is not None else max_shape[1] self.resolution_log2 = int(np.log2(self.resolution)) self.shape = [max_shape[0], self.resolution, self.resolution] tfr_lods = [self.resolution_log2 - int(np.log2(shape[1])) for shape in tfr_shapes] assert all(shape[0] == max_shape[0] for shape in tfr_shapes) assert all(shape[1] == shape[2] for shape in tfr_shapes) assert all(shape[1] == self.resolution // (2**lod) for shape, lod in zip(tfr_shapes, tfr_lods)) assert all(lod in tfr_lods for lod in range(self.resolution_log2 - 1)) # Load labels. assert max_label_size == 'full' or max_label_size >= 0 self._np_labels = np.zeros([1<<30, 0], dtype=np.float32) if self.label_file is not None and max_label_size != 0: self._np_labels = np.load(self.label_file) assert self._np_labels.ndim == 2 if max_label_size != 'full' and self._np_labels.shape[1] > max_label_size: self._np_labels = self._np_labels[:, :max_label_size] if max_images is not None and self._np_labels.shape[0] > max_images: self._np_labels = self._np_labels[:max_images] self.label_size = self._np_labels.shape[1] self.label_dtype = self._np_labels.dtype.name # Build TF expressions. with tf.name_scope('Dataset'), tf.device('/cpu:0'): self._tf_minibatch_in = tf.placeholder(tf.int64, name='minibatch_in', shape=[]) self._tf_labels_var = tflib.create_var_with_large_initial_value(self._np_labels, name='labels_var') self._tf_labels_dataset = tf.data.Dataset.from_tensor_slices(self._tf_labels_var) for tfr_file, tfr_shape, tfr_lod in zip(tfr_files, tfr_shapes, tfr_lods): if tfr_lod < 0: continue dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_mb<<20) if max_images is not None: dset = dset.take(max_images) dset = dset.map(self.parse_tfrecord_tf, num_parallel_calls=num_threads) dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset)) bytes_per_item = np.prod(tfr_shape) * np.dtype(self.dtype).itemsize if shuffle_mb > 0: dset = dset.shuffle(((shuffle_mb << 20) - 1) // bytes_per_item + 1) if repeat: dset = dset.repeat() if prefetch_mb > 0: dset = dset.prefetch(((prefetch_mb << 20) - 1) // bytes_per_item + 1) dset = dset.batch(self._tf_minibatch_in) self._tf_datasets[tfr_lod] = dset self._tf_iterator = tf.data.Iterator.from_structure(self._tf_datasets[0].output_types, self._tf_datasets[0].output_shapes) self._tf_init_ops = {lod: self._tf_iterator.make_initializer(dset) for lod, dset in self._tf_datasets.items()} def close(self): pass # Use the given minibatch size and level-of-detail for the data returned by get_minibatch_tf(). def configure(self, minibatch_size, lod=0): lod = int(np.floor(lod)) assert minibatch_size >= 1 and lod in self._tf_datasets if self._cur_minibatch != minibatch_size or self._cur_lod != lod: self._tf_init_ops[lod].run({self._tf_minibatch_in: minibatch_size}) self._cur_minibatch = minibatch_size self._cur_lod = lod # Get next minibatch as TensorFlow expressions. def get_minibatch_tf(self): # => images, labels return self._tf_iterator.get_next() # Get next minibatch as NumPy arrays. def get_minibatch_np(self, minibatch_size, lod=0): # => images, labels self.configure(minibatch_size, lod) with tf.name_scope('Dataset'): if self._tf_minibatch_np is None: self._tf_minibatch_np = self.get_minibatch_tf() return tflib.run(self._tf_minibatch_np) # Get random labels as TensorFlow expression. def get_random_labels_tf(self, minibatch_size): # => labels with tf.name_scope('Dataset'): if self.label_size > 0: with tf.device('/cpu:0'): return tf.gather(self._tf_labels_var, tf.random_uniform([minibatch_size], 0, self._np_labels.shape[0], dtype=tf.int32)) return tf.zeros([minibatch_size, 0], self.label_dtype) # Get random labels as NumPy array. def get_random_labels_np(self, minibatch_size): # => labels if self.label_size > 0: return self._np_labels[np.random.randint(self._np_labels.shape[0], size=[minibatch_size])] return np.zeros([minibatch_size, 0], self.label_dtype) # Parse individual image from a tfrecords file into TensorFlow expression. @staticmethod def parse_tfrecord_tf(record): features = tf.parse_single_example(record, features={ 'shape': tf.FixedLenFeature([3], tf.int64), 'data': tf.FixedLenFeature([], tf.string)}) data = tf.decode_raw(features['data'], tf.uint8) return tf.reshape(data, features['shape']) # Parse individual image from a tfrecords file into NumPy array. @staticmethod def parse_tfrecord_np(record): ex = tf.train.Example() ex.ParseFromString(record) shape = ex.features.feature['shape'].int64_list.value # pylint: disable=no-member data = ex.features.feature['data'].bytes_list.value[0] # pylint: disable=no-member return np.fromstring(data, np.uint8).reshape(shape) #---------------------------------------------------------------------------- # Helper func for constructing a dataset object using the given options. def load_dataset(class_name=None, data_dir=None, verbose=False, **kwargs): kwargs = dict(kwargs) if 'tfrecord_dir' in kwargs: if class_name is None: class_name = __name__ + '.TFRecordDataset' if data_dir is not None: kwargs['tfrecord_dir'] = os.path.join(data_dir, kwargs['tfrecord_dir']) assert class_name is not None if verbose: print('Streaming data using %s...' % class_name) dataset = dnnlib.util.get_obj_by_name(class_name)(**kwargs) if verbose: print('Dataset shape =', np.int32(dataset.shape).tolist()) print('Dynamic range =', dataset.dynamic_range) print('Label size =', dataset.label_size) return dataset #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/training/loss.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Loss functions.""" import numpy as np import tensorflow as tf import dnnlib.tflib as tflib from dnnlib.tflib.autosummary import autosummary #---------------------------------------------------------------------------- # Logistic loss from the paper # "Generative Adversarial Nets", Goodfellow et al. 2014 def G_logistic(G, D, opt, training_set, minibatch_size): _ = opt latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) labels = training_set.get_random_labels_tf(minibatch_size) fake_images_out = G.get_output_for(latents, labels, is_training=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) loss = -tf.nn.softplus(fake_scores_out) # log(1-sigmoid(fake_scores_out)) # pylint: disable=invalid-unary-operand-type return loss, None def G_logistic_ns(G, D, opt, training_set, minibatch_size): _ = opt latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) labels = training_set.get_random_labels_tf(minibatch_size) fake_images_out = G.get_output_for(latents, labels, is_training=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) loss = tf.nn.softplus(-fake_scores_out) # -log(sigmoid(fake_scores_out)) return loss, None def D_logistic(G, D, opt, training_set, minibatch_size, reals, labels): _ = opt, training_set latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) fake_images_out = G.get_output_for(latents, labels, is_training=True) real_scores_out = D.get_output_for(reals, labels, is_training=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) real_scores_out = autosummary('Loss/scores/real', real_scores_out) fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out)) loss += tf.nn.softplus(-real_scores_out) # -log(sigmoid(real_scores_out)) # pylint: disable=invalid-unary-operand-type return loss, None #---------------------------------------------------------------------------- # R1 and R2 regularizers from the paper # "Which Training Methods for GANs do actually Converge?", Mescheder et al. 2018 def D_logistic_r1(G, D, opt, training_set, minibatch_size, reals, labels, gamma=10.0): _ = opt, training_set latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) fake_images_out = G.get_output_for(latents, labels, is_training=True) real_scores_out = D.get_output_for(reals, labels, is_training=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) ppl_real, ppl_fake = None, None if isinstance(real_scores_out, tuple): real_scores_out, real_quant_loss, ppl_real = real_scores_out[0], real_scores_out[1], real_scores_out[2] fake_scores_out, fake_quant_loss, ppl_fake = fake_scores_out[0], fake_scores_out[1], fake_scores_out[2] real_scores_out = autosummary('Loss/scores/real', real_scores_out) fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out)) loss += tf.nn.softplus(-real_scores_out) + real_quant_loss + fake_quant_loss # -log(sigmoid(real_scores_out)) # pylint: # disable=invalid-unary-operand-type else: real_scores_out = autosummary('Loss/scores/real', real_scores_out) fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) loss = tf.nn.softplus(fake_scores_out) # -log(1 - logistic(fake_scores_out)) loss += tf.nn.softplus( -real_scores_out) # -log(logistic(real_scores_out)) # temporary pylint workaround # pylint: disable=invalid-unary-operand-type with tf.name_scope('GradientPenalty'): real_grads = tf.gradients(tf.reduce_sum(real_scores_out), [reals])[0] gradient_penalty = tf.reduce_sum(tf.square(real_grads), axis=[1,2,3]) gradient_penalty = autosummary('Loss/gradient_penalty', gradient_penalty) reg = gradient_penalty * (gamma * 0.5) if ppl_fake is not None: ppl = (ppl_fake + ppl_real) / 2 else: ppl = tf.zeros(1) return loss, reg, ppl def D_logistic_r2(G, D, opt, training_set, minibatch_size, reals, labels, gamma=10.0): _ = opt, training_set latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) fake_images_out = G.get_output_for(latents, labels, is_training=True) real_scores_out = D.get_output_for(reals, labels, is_training=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) real_scores_out = autosummary('Loss/scores/real', real_scores_out) fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out)) loss += tf.nn.softplus(-real_scores_out) # -log(sigmoid(real_scores_out)) # pylint: disable=invalid-unary-operand-type with tf.name_scope('GradientPenalty'): fake_grads = tf.gradients(tf.reduce_sum(fake_scores_out), [fake_images_out])[0] gradient_penalty = tf.reduce_sum(tf.square(fake_grads), axis=[1,2,3]) gradient_penalty = autosummary('Loss/gradient_penalty', gradient_penalty) reg = gradient_penalty * (gamma * 0.5) return loss, reg #---------------------------------------------------------------------------- # WGAN loss from the paper # "Wasserstein Generative Adversarial Networks", Arjovsky et al. 2017 def G_wgan(G, D, opt, training_set, minibatch_size): _ = opt latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) labels = training_set.get_random_labels_tf(minibatch_size) fake_images_out = G.get_output_for(latents, labels, is_training=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) loss = -fake_scores_out return loss, None def D_wgan(G, D, opt, training_set, minibatch_size, reals, labels, wgan_epsilon=0.001): _ = opt, training_set latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) fake_images_out = G.get_output_for(latents, labels, is_training=True) real_scores_out = D.get_output_for(reals, labels, is_training=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) real_scores_out = autosummary('Loss/scores/real', real_scores_out) fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) loss = fake_scores_out - real_scores_out with tf.name_scope('EpsilonPenalty'): epsilon_penalty = autosummary('Loss/epsilon_penalty', tf.square(real_scores_out)) loss += epsilon_penalty * wgan_epsilon return loss, None #---------------------------------------------------------------------------- # WGAN-GP loss from the paper # "Improved Training of Wasserstein GANs", Gulrajani et al. 2017 def D_wgan_gp(G, D, opt, training_set, minibatch_size, reals, labels, wgan_lambda=10.0, wgan_epsilon=0.001, wgan_target=1.0): _ = opt, training_set latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) fake_images_out = G.get_output_for(latents, labels, is_training=True) real_scores_out = D.get_output_for(reals, labels, is_training=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) real_scores_out = autosummary('Loss/scores/real', real_scores_out) fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out) loss = fake_scores_out - real_scores_out with tf.name_scope('EpsilonPenalty'): epsilon_penalty = autosummary('Loss/epsilon_penalty', tf.square(real_scores_out)) loss += epsilon_penalty * wgan_epsilon with tf.name_scope('GradientPenalty'): mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype) mixed_images_out = tflib.lerp(tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors) mixed_scores_out = D.get_output_for(mixed_images_out, labels, is_training=True) mixed_scores_out = autosummary('Loss/scores/mixed', mixed_scores_out) mixed_grads = tf.gradients(tf.reduce_sum(mixed_scores_out), [mixed_images_out])[0] mixed_norms = tf.sqrt(tf.reduce_sum(tf.square(mixed_grads), axis=[1,2,3])) mixed_norms = autosummary('Loss/mixed_norms', mixed_norms) gradient_penalty = tf.square(mixed_norms - wgan_target) reg = gradient_penalty * (wgan_lambda / (wgan_target**2)) return loss, reg #---------------------------------------------------------------------------- # Non-saturating logistic loss with path length regularizer from the paper # "Analyzing and Improving the Image Quality of StyleGAN", Karras et al. 2019 def G_logistic_ns_pathreg(G, D, opt, training_set, minibatch_size, pl_minibatch_shrink=2, pl_decay=0.01, pl_weight=2.0): _ = opt latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) labels = training_set.get_random_labels_tf(minibatch_size) fake_images_out, fake_dlatents_out = G.get_output_for(latents, labels, is_training=True, return_dlatents=True) fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True) if isinstance(fake_scores_out, tuple): fake_scores_out, quant_loss = fake_scores_out[0], fake_scores_out[1] loss = tf.nn.softplus(-fake_scores_out) + quant_loss # -log(logistic(fake_scores_out)) else: loss = tf.nn.softplus(-fake_scores_out) # -log(logistic(fake_scores_out)) # Path length regularization. with tf.name_scope('PathReg'): # Evaluate the regularization term using a smaller minibatch to conserve memory. if pl_minibatch_shrink > 1: pl_minibatch = minibatch_size // pl_minibatch_shrink pl_latents = tf.random_normal([pl_minibatch] + G.input_shapes[0][1:]) pl_labels = training_set.get_random_labels_tf(pl_minibatch) fake_images_out, fake_dlatents_out = G.get_output_for(pl_latents, pl_labels, is_training=True, return_dlatents=True) # Compute |J*y|. pl_noise = tf.random_normal(tf.shape(fake_images_out)) / np.sqrt(np.prod(G.output_shape[2:])) pl_grads = tf.gradients(tf.reduce_sum(fake_images_out * pl_noise), [fake_dlatents_out])[0] pl_lengths = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1)) pl_lengths = autosummary('Loss/pl_lengths', pl_lengths) # Track exponential moving average of |J*y|. with tf.control_dependencies(None): pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0.0, dtype=tf.float32) pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var) pl_update = tf.assign(pl_mean_var, pl_mean) # Calculate (|J*y|-a)^2. with tf.control_dependencies([pl_update]): pl_penalty = tf.square(pl_lengths - pl_mean) pl_penalty = autosummary('Loss/pl_penalty', pl_penalty) # Apply weight. # # Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean # in pl_lengths decreases it by num_affine_layers. The effective weight then becomes: # # gamma_pl = pl_weight / num_pixels / num_affine_layers # = 2 / (r^2) / (log2(r) * 2 - 2) # = 1 / (r^2 * (log2(r) - 1)) # = ln(2) / (r^2 * (ln(r) - ln(2)) # reg = pl_penalty * pl_weight return loss, reg #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/training/misc.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Miscellaneous utility functions.""" import os import pickle import numpy as np import PIL.Image import PIL.ImageFont import dnnlib #---------------------------------------------------------------------------- # Convenience wrappers for pickle that are able to load data produced by # older versions of the code, and from external URLs. def open_file_or_url(file_or_url): if dnnlib.util.is_url(file_or_url): return dnnlib.util.open_url(file_or_url, cache_dir='.stylegan2-cache') return open(file_or_url, 'rb') def load_pkl(file_or_url): with open_file_or_url(file_or_url) as file: return pickle.load(file, encoding='latin1') def save_pkl(obj, filename): with open(filename, 'wb') as file: pickle.dump(obj, file, protocol=pickle.HIGHEST_PROTOCOL) #---------------------------------------------------------------------------- # Image utils. def adjust_dynamic_range(data, drange_in, drange_out): if drange_in != drange_out: scale = (np.float32(drange_out[1]) - np.float32(drange_out[0])) / (np.float32(drange_in[1]) - np.float32(drange_in[0])) bias = (np.float32(drange_out[0]) - np.float32(drange_in[0]) * scale) data = data * scale + bias return data def create_image_grid(images, grid_size=None): assert images.ndim == 3 or images.ndim == 4 num, img_w, img_h = images.shape[0], images.shape[-1], images.shape[-2] if grid_size is not None: grid_w, grid_h = tuple(grid_size) else: grid_w = max(int(np.ceil(np.sqrt(num))), 1) grid_h = max((num - 1) // grid_w + 1, 1) grid = np.zeros(list(images.shape[1:-2]) + [grid_h * img_h, grid_w * img_w], dtype=images.dtype) for idx in range(num): x = (idx % grid_w) * img_w y = (idx // grid_w) * img_h grid[..., y : y + img_h, x : x + img_w] = images[idx] return grid def convert_to_pil_image(image, drange=[0,1]): assert image.ndim == 2 or image.ndim == 3 if image.ndim == 3: if image.shape[0] == 1: image = image[0] # grayscale CHW => HW else: image = image.transpose(1, 2, 0) # CHW -> HWC image = adjust_dynamic_range(image, drange, [0,255]) image = np.rint(image).clip(0, 255).astype(np.uint8) fmt = 'RGB' if image.ndim == 3 else 'L' return PIL.Image.fromarray(image, fmt) def save_image_grid(images, filename, drange=[0,1], grid_size=None): convert_to_pil_image(create_image_grid(images, grid_size), drange).save(filename) def apply_mirror_augment(minibatch): mask = np.random.rand(minibatch.shape[0]) < 0.5 minibatch = np.array(minibatch) minibatch[mask] = minibatch[mask, :, :, ::-1] return minibatch #---------------------------------------------------------------------------- # Loading data from previous training runs. def parse_config_for_previous_run(run_dir): with open(os.path.join(run_dir, 'submit_config.pkl'), 'rb') as f: data = pickle.load(f) data = data.get('run_func_kwargs', {}) return dict(train=data, dataset=data.get('dataset_args', {})) #---------------------------------------------------------------------------- # Size and contents of the image snapshot grids that are exported # periodically during training. def setup_snapshot_image_grid(training_set, size = '1080p', # '1080p' = to be viewed on 1080p display, '4k' = to be viewed on 4k display. layout = 'random'): # 'random' = grid contents are selected randomly, 'row_per_class' = each row corresponds to one class label. # Select size. gw = 1; gh = 1 if size == '1080p': gw = np.clip(1920 // training_set.shape[2], 3, 32) gh = np.clip(1080 // training_set.shape[1], 2, 32) if size == '4k': gw = np.clip(3840 // training_set.shape[2], 7, 32) gh = np.clip(2160 // training_set.shape[1], 4, 32) if size == '8k': gw = np.clip(7680 // training_set.shape[2], 7, 32) gh = np.clip(4320 // training_set.shape[1], 4, 32) # Initialize data arrays. reals = np.zeros([gw * gh] + training_set.shape, dtype=training_set.dtype) labels = np.zeros([gw * gh, training_set.label_size], dtype=training_set.label_dtype) # Random layout. if layout == 'random': reals[:], labels[:] = training_set.get_minibatch_np(gw * gh) # Class-conditional layouts. class_layouts = dict(row_per_class=[gw,1], col_per_class=[1,gh], class4x4=[4,4]) if layout in class_layouts: bw, bh = class_layouts[layout] nw = (gw - 1) // bw + 1 nh = (gh - 1) // bh + 1 blocks = [[] for _i in range(nw * nh)] for _iter in range(1000000): real, label = training_set.get_minibatch_np(1) idx = np.argmax(label[0]) while idx < len(blocks) and len(blocks[idx]) >= bw * bh: idx += training_set.label_size if idx < len(blocks): blocks[idx].append((real, label)) if all(len(block) >= bw * bh for block in blocks): break for i, block in enumerate(blocks): for j, (real, label) in enumerate(block): x = (i % nw) * bw + j % bw y = (i // nw) * bh + j // bw if x < gw and y < gh: reals[x + y * gw] = real[0] labels[x + y * gw] = label[0] return (gw, gh), reals, labels #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/training/networks_stylegan.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Network architectures used in the StyleGAN paper.""" import numpy as np import tensorflow as tf import dnnlib import dnnlib.tflib as tflib # NOTE: Do not import any application-specific modules here! # Specify all network parameters as kwargs. #---------------------------------------------------------------------------- # Primitive ops for manipulating 4D activation tensors. # The gradients of these are not necessary efficient or even meaningful. def _blur2d(x, f=[1,2,1], normalize=True, flip=False, stride=1): assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:]) assert isinstance(stride, int) and stride >= 1 # Finalize filter kernel. f = np.array(f, dtype=np.float32) if f.ndim == 1: f = f[:, np.newaxis] * f[np.newaxis, :] assert f.ndim == 2 if normalize: f /= np.sum(f) if flip: f = f[::-1, ::-1] f = f[:, :, np.newaxis, np.newaxis] f = np.tile(f, [1, 1, int(x.shape[1]), 1]) # No-op => early exit. if f.shape == (1, 1) and f[0,0] == 1: return x # Convolve using depthwise_conv2d. orig_dtype = x.dtype x = tf.cast(x, tf.float32) # tf.nn.depthwise_conv2d() doesn't support fp16 f = tf.constant(f, dtype=x.dtype, name='filter') strides = [1, 1, stride, stride] x = tf.nn.depthwise_conv2d(x, f, strides=strides, padding='SAME', data_format='NCHW') x = tf.cast(x, orig_dtype) return x def _upscale2d(x, factor=2, gain=1): assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:]) assert isinstance(factor, int) and factor >= 1 # Apply gain. if gain != 1: x *= gain # No-op => early exit. if factor == 1: return x # Upscale using tf.tile(). s = x.shape x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) x = tf.tile(x, [1, 1, 1, factor, 1, factor]) x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) return x def _downscale2d(x, factor=2, gain=1): assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:]) assert isinstance(factor, int) and factor >= 1 # 2x2, float32 => downscale using _blur2d(). if factor == 2 and x.dtype == tf.float32: f = [np.sqrt(gain) / factor] * factor return _blur2d(x, f=f, normalize=False, stride=factor) # Apply gain. if gain != 1: x *= gain # No-op => early exit. if factor == 1: return x # Large factor => downscale using tf.nn.avg_pool(). # NOTE: Requires tf_config['graph_options.place_pruned_graph']=True to work. ksize = [1, 1, factor, factor] return tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding='VALID', data_format='NCHW') #---------------------------------------------------------------------------- # High-level ops for manipulating 4D activation tensors. # The gradients of these are meant to be as efficient as possible. def blur2d(x, f=[1,2,1], normalize=True): with tf.variable_scope('Blur2D'): @tf.custom_gradient def func(x): y = _blur2d(x, f, normalize) @tf.custom_gradient def grad(dy): dx = _blur2d(dy, f, normalize, flip=True) return dx, lambda ddx: _blur2d(ddx, f, normalize) return y, grad return func(x) def upscale2d(x, factor=2): with tf.variable_scope('Upscale2D'): @tf.custom_gradient def func(x): y = _upscale2d(x, factor) @tf.custom_gradient def grad(dy): dx = _downscale2d(dy, factor, gain=factor**2) return dx, lambda ddx: _upscale2d(ddx, factor) return y, grad return func(x) def downscale2d(x, factor=2): with tf.variable_scope('Downscale2D'): @tf.custom_gradient def func(x): y = _downscale2d(x, factor) @tf.custom_gradient def grad(dy): dx = _upscale2d(dy, factor, gain=1/factor**2) return dx, lambda ddx: _downscale2d(ddx, factor) return y, grad return func(x) #---------------------------------------------------------------------------- # Get/create weight tensor for a convolutional or fully-connected layer. def get_weight(shape, gain=np.sqrt(2), use_wscale=False, lrmul=1): fan_in = np.prod(shape[:-1]) # [kernel, kernel, fmaps_in, fmaps_out] or [in, out] he_std = gain / np.sqrt(fan_in) # He init # Equalized learning rate and custom learning rate multiplier. if use_wscale: init_std = 1.0 / lrmul runtime_coef = he_std * lrmul else: init_std = he_std / lrmul runtime_coef = lrmul # Create variable. init = tf.initializers.random_normal(0, init_std) return tf.get_variable('weight', shape=shape, initializer=init) * runtime_coef #---------------------------------------------------------------------------- # Fully-connected layer. def dense(x, fmaps, **kwargs): if len(x.shape) > 2: x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])]) w = get_weight([x.shape[1].value, fmaps], **kwargs) w = tf.cast(w, x.dtype) return tf.matmul(x, w) #---------------------------------------------------------------------------- # Convolutional layer. def conv2d(x, fmaps, kernel, **kwargs): assert kernel >= 1 and kernel % 2 == 1 w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs) w = tf.cast(w, x.dtype) return tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='SAME', data_format='NCHW') #---------------------------------------------------------------------------- # Fused convolution + scaling. # Faster and uses less memory than performing the operations separately. def upscale2d_conv2d(x, fmaps, kernel, fused_scale='auto', **kwargs): assert kernel >= 1 and kernel % 2 == 1 assert fused_scale in [True, False, 'auto'] if fused_scale == 'auto': fused_scale = min(x.shape[2:]) * 2 >= 128 # Not fused => call the individual ops directly. if not fused_scale: return conv2d(upscale2d(x), fmaps, kernel, **kwargs) # Fused => perform both ops simultaneously using tf.nn.conv2d_transpose(). w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs) w = tf.transpose(w, [0, 1, 3, 2]) # [kernel, kernel, fmaps_out, fmaps_in] w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT') w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) w = tf.cast(w, x.dtype) os = [tf.shape(x)[0], fmaps, x.shape[2] * 2, x.shape[3] * 2] return tf.nn.conv2d_transpose(x, w, os, strides=[1,1,2,2], padding='SAME', data_format='NCHW') def conv2d_downscale2d(x, fmaps, kernel, fused_scale='auto', **kwargs): assert kernel >= 1 and kernel % 2 == 1 assert fused_scale in [True, False, 'auto'] if fused_scale == 'auto': fused_scale = min(x.shape[2:]) >= 128 # Not fused => call the individual ops directly. if not fused_scale: return downscale2d(conv2d(x, fmaps, kernel, **kwargs)) # Fused => perform both ops simultaneously using tf.nn.conv2d(). w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs) w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT') w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) * 0.25 w = tf.cast(w, x.dtype) return tf.nn.conv2d(x, w, strides=[1,1,2,2], padding='SAME', data_format='NCHW') #---------------------------------------------------------------------------- # Apply bias to the given activation tensor. def apply_bias(x, lrmul=1): b = tf.get_variable('bias', shape=[x.shape[1]], initializer=tf.initializers.zeros()) * lrmul b = tf.cast(b, x.dtype) if len(x.shape) == 2: return x + b return x + tf.reshape(b, [1, -1, 1, 1]) #---------------------------------------------------------------------------- # Leaky ReLU activation. More efficient than tf.nn.leaky_relu() and supports FP16. def leaky_relu(x, alpha=0.2): with tf.variable_scope('LeakyReLU'): alpha = tf.constant(alpha, dtype=x.dtype, name='alpha') @tf.custom_gradient def func(x): y = tf.maximum(x, x * alpha) @tf.custom_gradient def grad(dy): dx = tf.where(y >= 0, dy, dy * alpha) return dx, lambda ddx: tf.where(y >= 0, ddx, ddx * alpha) return y, grad return func(x) #---------------------------------------------------------------------------- # Pixelwise feature vector normalization. def pixel_norm(x, epsilon=1e-8): with tf.variable_scope('PixelNorm'): epsilon = tf.constant(epsilon, dtype=x.dtype, name='epsilon') return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + epsilon) #---------------------------------------------------------------------------- # Instance normalization. def instance_norm(x, epsilon=1e-8): assert len(x.shape) == 4 # NCHW with tf.variable_scope('InstanceNorm'): orig_dtype = x.dtype x = tf.cast(x, tf.float32) x -= tf.reduce_mean(x, axis=[2,3], keepdims=True) epsilon = tf.constant(epsilon, dtype=x.dtype, name='epsilon') x *= tf.rsqrt(tf.reduce_mean(tf.square(x), axis=[2,3], keepdims=True) + epsilon) x = tf.cast(x, orig_dtype) return x #---------------------------------------------------------------------------- # Style modulation. def style_mod(x, dlatent, **kwargs): with tf.variable_scope('StyleMod'): style = apply_bias(dense(dlatent, fmaps=x.shape[1]*2, gain=1, **kwargs)) style = tf.reshape(style, [-1, 2, x.shape[1]] + [1] * (len(x.shape) - 2)) return x * (style[:,0] + 1) + style[:,1] #---------------------------------------------------------------------------- # Noise input. def apply_noise(x, noise_var=None, randomize_noise=True): assert len(x.shape) == 4 # NCHW with tf.variable_scope('Noise'): if noise_var is None or randomize_noise: noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype) else: noise = tf.cast(noise_var, x.dtype) weight = tf.get_variable('weight', shape=[x.shape[1].value], initializer=tf.initializers.zeros()) return x + noise * tf.reshape(tf.cast(weight, x.dtype), [1, -1, 1, 1]) #---------------------------------------------------------------------------- # Minibatch standard deviation. def minibatch_stddev_layer(x, group_size=4, num_new_features=1): with tf.variable_scope('MinibatchStddev'): group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size. s = x.shape # [NCHW] Input shape. y = tf.reshape(x, [group_size, -1, num_new_features, s[1]//num_new_features, s[2], s[3]]) # [GMncHW] Split minibatch into M groups of size G. Split channels into n channel groups c. y = tf.cast(y, tf.float32) # [GMncHW] Cast to FP32. y -= tf.reduce_mean(y, axis=0, keepdims=True) # [GMncHW] Subtract mean over group. y = tf.reduce_mean(tf.square(y), axis=0) # [MncHW] Calc variance over group. y = tf.sqrt(y + 1e-8) # [MncHW] Calc stddev over group. y = tf.reduce_mean(y, axis=[2,3,4], keepdims=True) # [Mn111] Take average over fmaps and pixels. y = tf.reduce_mean(y, axis=[2]) # [Mn11] Split channels into c channel groups y = tf.cast(y, x.dtype) # [Mn11] Cast back to original data type. y = tf.tile(y, [group_size, 1, s[2], s[3]]) # [NnHW] Replicate over group and pixels. return tf.concat([x, y], axis=1) # [NCHW] Append as new fmap. #---------------------------------------------------------------------------- # Style-based generator used in the StyleGAN paper. # Composed of two sub-networks (G_mapping and G_synthesis) that are defined below. def G_style( latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. labels_in, # Second input: Conditioning labels [minibatch, label_size]. truncation_psi = 0.7, # Style strength multiplier for the truncation trick. None = disable. truncation_cutoff = 8, # Number of layers for which to apply the truncation trick. None = disable. truncation_psi_val = None, # Value for truncation_psi to use during validation. truncation_cutoff_val = None, # Value for truncation_cutoff to use during validation. dlatent_avg_beta = 0.995, # Decay for tracking the moving average of W during training. None = disable. style_mixing_prob = 0.9, # Probability of mixing styles during training. None = disable. is_training = False, # Network is under training? Enables and disables specific features. is_validation = False, # Network is under validation? Chooses which value to use for truncation_psi. is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. components = dnnlib.EasyDict(), # Container for sub-networks. Retained between calls. **kwargs): # Arguments for sub-networks (G_mapping and G_synthesis). # Validate arguments. assert not is_training or not is_validation assert isinstance(components, dnnlib.EasyDict) if is_validation: truncation_psi = truncation_psi_val truncation_cutoff = truncation_cutoff_val if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1): truncation_psi = None if is_training or (truncation_cutoff is not None and not tflib.is_tf_expression(truncation_cutoff) and truncation_cutoff <= 0): truncation_cutoff = None if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1): dlatent_avg_beta = None if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0): style_mixing_prob = None # Setup components. if 'synthesis' not in components: components.synthesis = tflib.Network('G_synthesis', func_name=G_synthesis, **kwargs) num_layers = components.synthesis.input_shape[1] dlatent_size = components.synthesis.input_shape[2] if 'mapping' not in components: components.mapping = tflib.Network('G_mapping', func_name=G_mapping, dlatent_broadcast=num_layers, **kwargs) # Setup variables. lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False) dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False) # Evaluate mapping network. dlatents = components.mapping.get_output_for(latents_in, labels_in, **kwargs) # Update moving average of W. if dlatent_avg_beta is not None: with tf.variable_scope('DlatentAvg'): batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0) update_op = tf.assign(dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) with tf.control_dependencies([update_op]): dlatents = tf.identity(dlatents) # Perform style mixing regularization. if style_mixing_prob is not None: with tf.name_scope('StyleMix'): latents2 = tf.random_normal(tf.shape(latents_in)) dlatents2 = components.mapping.get_output_for(latents2, labels_in, **kwargs) layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2 mixing_cutoff = tf.cond( tf.random_uniform([], 0.0, 1.0) < style_mixing_prob, lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32), lambda: cur_layers) dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2) # Apply truncation trick. if truncation_psi is not None and truncation_cutoff is not None: with tf.variable_scope('Truncation'): layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] ones = np.ones(layer_idx.shape, dtype=np.float32) coefs = tf.where(layer_idx < truncation_cutoff, truncation_psi * ones, ones) dlatents = tflib.lerp(dlatent_avg, dlatents, coefs) # Evaluate synthesis network. with tf.control_dependencies([tf.assign(components.synthesis.find_var('lod'), lod_in)]): images_out = components.synthesis.get_output_for(dlatents, force_clean_graph=is_template_graph, **kwargs) return tf.identity(images_out, name='images_out') #---------------------------------------------------------------------------- # Mapping network used in the StyleGAN paper. def G_mapping( latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. labels_in, # Second input: Conditioning labels [minibatch, label_size]. latent_size = 512, # Latent vector (Z) dimensionality. label_size = 0, # Label dimensionality, 0 if no labels. dlatent_size = 512, # Disentangled latent (W) dimensionality. dlatent_broadcast = None, # Output disentangled latent (W) as [minibatch, dlatent_size] or [minibatch, dlatent_broadcast, dlatent_size]. mapping_layers = 8, # Number of mapping layers. mapping_fmaps = 512, # Number of activations in the mapping layers. mapping_lrmul = 0.01, # Learning rate multiplier for the mapping layers. mapping_nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu'. use_wscale = True, # Enable equalized learning rate? normalize_latents = True, # Normalize latent vectors (Z) before feeding them to the mapping layers? dtype = 'float32', # Data type to use for activations and outputs. **_kwargs): # Ignore unrecognized keyword args. act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[mapping_nonlinearity] # Inputs. latents_in.set_shape([None, latent_size]) labels_in.set_shape([None, label_size]) latents_in = tf.cast(latents_in, dtype) labels_in = tf.cast(labels_in, dtype) x = latents_in # Embed labels and concatenate them with latents. if label_size: with tf.variable_scope('LabelConcat'): w = tf.get_variable('weight', shape=[label_size, latent_size], initializer=tf.initializers.random_normal()) y = tf.matmul(labels_in, tf.cast(w, dtype)) x = tf.concat([x, y], axis=1) # Normalize latents. if normalize_latents: x = pixel_norm(x) # Mapping layers. for layer_idx in range(mapping_layers): with tf.variable_scope('Dense%d' % layer_idx): fmaps = dlatent_size if layer_idx == mapping_layers - 1 else mapping_fmaps x = dense(x, fmaps=fmaps, gain=gain, use_wscale=use_wscale, lrmul=mapping_lrmul) x = apply_bias(x, lrmul=mapping_lrmul) x = act(x) # Broadcast. if dlatent_broadcast is not None: with tf.variable_scope('Broadcast'): x = tf.tile(x[:, np.newaxis], [1, dlatent_broadcast, 1]) # Output. assert x.dtype == tf.as_dtype(dtype) return tf.identity(x, name='dlatents_out') #---------------------------------------------------------------------------- # Synthesis network used in the StyleGAN paper. def G_synthesis( dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size]. dlatent_size = 512, # Disentangled latent (W) dimensionality. num_channels = 3, # Number of output color channels. resolution = 1024, # Output resolution. fmap_base = 8192, # Overall multiplier for the number of feature maps. fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. fmap_max = 512, # Maximum number of feature maps in any layer. use_styles = True, # Enable style inputs? const_input_layer = True, # First layer is a learned constant? use_noise = True, # Enable noise inputs? randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables. nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu' use_wscale = True, # Enable equalized learning rate? use_pixel_norm = False, # Enable pixelwise feature vector normalization? use_instance_norm = True, # Enable instance normalization? dtype = 'float32', # Data type to use for activations and outputs. fused_scale = 'auto', # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically. blur_filter = [1,2,1], # Low-pass filter to apply when resampling activations. None = no filtering. structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. force_clean_graph = False, # True = construct a clean graph that looks nice in TensorBoard, False = default behavior. **_kwargs): # Ignore unrecognized keyword args. resolution_log2 = int(np.log2(resolution)) assert resolution == 2**resolution_log2 and resolution >= 4 def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) def blur(x): return blur2d(x, blur_filter) if blur_filter else x if is_template_graph: force_clean_graph = True if force_clean_graph: randomize_noise = False if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive' act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity] num_layers = resolution_log2 * 2 - 2 num_styles = num_layers if use_styles else 1 images_out = None # Primary inputs. dlatents_in.set_shape([None, num_styles, dlatent_size]) dlatents_in = tf.cast(dlatents_in, dtype) lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype) # Noise inputs. noise_inputs = [] if use_noise: for layer_idx in range(num_layers): res = layer_idx // 2 + 2 shape = [1, use_noise, 2**res, 2**res] noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False)) # Things to do at the end of each layer. def layer_epilogue(x, layer_idx): if use_noise: x = apply_noise(x, noise_inputs[layer_idx], randomize_noise=randomize_noise) x = apply_bias(x) x = act(x) if use_pixel_norm: x = pixel_norm(x) if use_instance_norm: x = instance_norm(x) if use_styles: x = style_mod(x, dlatents_in[:, layer_idx], use_wscale=use_wscale) return x # Early layers. with tf.variable_scope('4x4'): if const_input_layer: with tf.variable_scope('Const'): x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.ones()) x = layer_epilogue(tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]), 0) else: with tf.variable_scope('Dense'): x = dense(dlatents_in[:, 0], fmaps=nf(1)*16, gain=gain/4, use_wscale=use_wscale) # tweak gain to match the official implementation of Progressing GAN x = layer_epilogue(tf.reshape(x, [-1, nf(1), 4, 4]), 0) with tf.variable_scope('Conv'): x = layer_epilogue(conv2d(x, fmaps=nf(1), kernel=3, gain=gain, use_wscale=use_wscale), 1) # Building blocks for remaining layers. def block(res, x): # res = 3..resolution_log2 with tf.variable_scope('%dx%d' % (2**res, 2**res)): with tf.variable_scope('Conv0_up'): x = layer_epilogue(blur(upscale2d_conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale)), res*2-4) with tf.variable_scope('Conv1'): x = layer_epilogue(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale), res*2-3) return x def torgb(res, x): # res = 2..resolution_log2 lod = resolution_log2 - res with tf.variable_scope('ToRGB_lod%d' % lod): return apply_bias(conv2d(x, fmaps=num_channels, kernel=1, gain=1, use_wscale=use_wscale)) # Fixed structure: simple and efficient, but does not support progressive growing. if structure == 'fixed': for res in range(3, resolution_log2 + 1): x = block(res, x) images_out = torgb(resolution_log2, x) # Linear structure: simple but inefficient. if structure == 'linear': images_out = torgb(2, x) for res in range(3, resolution_log2 + 1): lod = resolution_log2 - res x = block(res, x) img = torgb(res, x) images_out = upscale2d(images_out) with tf.variable_scope('Grow_lod%d' % lod): images_out = tflib.lerp_clip(img, images_out, lod_in - lod) # Recursive structure: complex but efficient. if structure == 'recursive': def cset(cur_lambda, new_cond, new_lambda): return lambda: tf.cond(new_cond, new_lambda, cur_lambda) def grow(x, res, lod): y = block(res, x) img = lambda: upscale2d(torgb(res, y), 2**lod) img = cset(img, (lod_in > lod), lambda: upscale2d(tflib.lerp(torgb(res, y), upscale2d(torgb(res - 1, x)), lod_in - lod), 2**lod)) if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1)) return img() images_out = grow(x, 3, resolution_log2 - 3) assert images_out.dtype == tf.as_dtype(dtype) return tf.identity(images_out, name='images_out') #---------------------------------------------------------------------------- # Discriminator used in the StyleGAN paper. def D_basic( images_in, # First input: Images [minibatch, channel, height, width]. labels_in, # Second input: Labels [minibatch, label_size]. num_channels = 1, # Number of input color channels. Overridden based on dataset. resolution = 32, # Input resolution. Overridden based on dataset. label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. fmap_base = 8192, # Overall multiplier for the number of feature maps. fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. fmap_max = 512, # Maximum number of feature maps in any layer. nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', use_wscale = True, # Enable equalized learning rate? mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable. mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer. dtype = 'float32', # Data type to use for activations and outputs. fused_scale = 'auto', # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically. blur_filter = [1,2,1], # Low-pass filter to apply when resampling activations. None = no filtering. structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. **_kwargs): # Ignore unrecognized keyword args. resolution_log2 = int(np.log2(resolution)) assert resolution == 2**resolution_log2 and resolution >= 4 def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) def blur(x): return blur2d(x, blur_filter) if blur_filter else x if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive' act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity] images_in.set_shape([None, num_channels, resolution, resolution]) labels_in.set_shape([None, label_size]) images_in = tf.cast(images_in, dtype) labels_in = tf.cast(labels_in, dtype) lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype) scores_out = None # Building blocks. def fromrgb(x, res): # res = 2..resolution_log2 with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)): return act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=1, gain=gain, use_wscale=use_wscale))) def block(x, res): # res = 2..resolution_log2 with tf.variable_scope('%dx%d' % (2**res, 2**res)): if res >= 3: # 8x8 and up with tf.variable_scope('Conv0'): x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale))) with tf.variable_scope('Conv1_down'): x = act(apply_bias(conv2d_downscale2d(blur(x), fmaps=nf(res-2), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale))) else: # 4x4 if mbstd_group_size > 1: x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features) with tf.variable_scope('Conv'): x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale))) with tf.variable_scope('Dense0'): x = act(apply_bias(dense(x, fmaps=nf(res-2), gain=gain, use_wscale=use_wscale))) with tf.variable_scope('Dense1'): x = apply_bias(dense(x, fmaps=max(label_size, 1), gain=1, use_wscale=use_wscale)) return x # Fixed structure: simple and efficient, but does not support progressive growing. if structure == 'fixed': x = fromrgb(images_in, resolution_log2) for res in range(resolution_log2, 2, -1): x = block(x, res) scores_out = block(x, 2) # Linear structure: simple but inefficient. if structure == 'linear': img = images_in x = fromrgb(img, resolution_log2) for res in range(resolution_log2, 2, -1): lod = resolution_log2 - res x = block(x, res) img = downscale2d(img) y = fromrgb(img, res - 1) with tf.variable_scope('Grow_lod%d' % lod): x = tflib.lerp_clip(x, y, lod_in - lod) scores_out = block(x, 2) # Recursive structure: complex but efficient. if structure == 'recursive': def cset(cur_lambda, new_cond, new_lambda): return lambda: tf.cond(new_cond, new_lambda, cur_lambda) def grow(res, lod): x = lambda: fromrgb(downscale2d(images_in, 2**lod), res) if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1)) x = block(x(), res); y = lambda: x if res > 2: y = cset(y, (lod_in > lod), lambda: tflib.lerp(x, fromrgb(downscale2d(images_in, 2**(lod+1)), res - 1), lod_in - lod)) return y() scores_out = grow(2, resolution_log2 - 2) # Label conditioning from "Which Training Methods for GANs do actually Converge?" if label_size: with tf.variable_scope('LabelSwitch'): scores_out = tf.reduce_sum(scores_out * labels_in, axis=1, keepdims=True) assert scores_out.dtype == tf.as_dtype(dtype) scores_out = tf.identity(scores_out, name='scores_out') return scores_out #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/training/networks_stylegan2.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Network architectures used in the StyleGAN2 paper.""" import numpy as np import tensorflow as tf import dnnlib import dnnlib.tflib as tflib from dnnlib.tflib.ops.upfirdn_2d import upsample_2d, downsample_2d, upsample_conv_2d, conv_downsample_2d from dnnlib.tflib.ops.fused_bias_act import fused_bias_act from tensorflow.python.training import moving_averages # NOTE: Do not import any application-specific modules here! # Specify all network parameters as kwargs. #---------------------------------------------------------------------------- # Get/create weight tensor for a convolution or fully-connected layer. def get_weight(shape, gain=1, use_wscale=True, lrmul=1, weight_var='weight'): fan_in = np.prod(shape[:-1]) # [kernel, kernel, fmaps_in, fmaps_out] or [in, out] he_std = gain / np.sqrt(fan_in) # He init # Equalized learning rate and custom learning rate multiplier. if use_wscale: init_std = 1.0 / lrmul runtime_coef = he_std * lrmul else: init_std = he_std / lrmul runtime_coef = lrmul # Create variable. init = tf.initializers.random_normal(0, init_std) return tf.get_variable(weight_var, shape=shape, initializer=init) * runtime_coef #---------------------------------------------------------------------------- # Fully-connected layer. def dense_layer(x, fmaps, gain=1, use_wscale=True, lrmul=1, weight_var='weight'): if len(x.shape) > 2: x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])]) w = get_weight([x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale, lrmul=lrmul, weight_var=weight_var) w = tf.cast(w, x.dtype) return tf.matmul(x, w) #---------------------------------------------------------------------------- # Convolution layer with optional upsampling or downsampling. def conv2d_layer(x, fmaps, kernel, up=False, down=False, resample_kernel=None, gain=1, use_wscale=True, lrmul=1, weight_var='weight'): assert not (up and down) assert kernel >= 1 and kernel % 2 == 1 w = get_weight([kernel, kernel, x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale, lrmul=lrmul, weight_var=weight_var) if up: x = upsample_conv_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel) elif down: x = conv_downsample_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel) else: x = tf.nn.conv2d(x, tf.cast(w, x.dtype), data_format='NCHW', strides=[1,1,1,1], padding='SAME') return x #---------------------------------------------------------------------------- # Apply bias and activation func. def apply_bias_act(x, act='linear', alpha=None, gain=None, lrmul=1, bias_var='bias'): b = tf.get_variable(bias_var, shape=[x.shape[1]], initializer=tf.initializers.zeros()) * lrmul return fused_bias_act(x, b=tf.cast(b, x.dtype), act=act, alpha=alpha, gain=gain) #---------------------------------------------------------------------------- # Naive upsampling (nearest neighbor) and downsampling (average pooling). def naive_upsample_2d(x, factor=2): with tf.variable_scope('NaiveUpsample'): _N, C, H, W = x.shape.as_list() x = tf.reshape(x, [-1, C, H, 1, W, 1]) x = tf.tile(x, [1, 1, 1, factor, 1, factor]) return tf.reshape(x, [-1, C, H * factor, W * factor]) def naive_downsample_2d(x, factor=2): with tf.variable_scope('NaiveDownsample'): _N, C, H, W = x.shape.as_list() x = tf.reshape(x, [-1, C, H // factor, factor, W // factor, factor]) return tf.reduce_mean(x, axis=[3,5]) #---------------------------------------------------------------------------- # Modulated convolution layer. def modulated_conv2d_layer(x, y, fmaps, kernel, up=False, down=False, demodulate=True, resample_kernel=None, gain=1, use_wscale=True, lrmul=1, fused_modconv=True, weight_var='weight', mod_weight_var='mod_weight', mod_bias_var='mod_bias'): assert not (up and down) assert kernel >= 1 and kernel % 2 == 1 # Get weight. w = get_weight([kernel, kernel, x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale, lrmul=lrmul, weight_var=weight_var) ww = w[np.newaxis] # [BkkIO] Introduce minibatch dimension. # Modulate. s = dense_layer(y, fmaps=x.shape[1].value, weight_var=mod_weight_var) # [BI] Transform incoming W to style. s = apply_bias_act(s, bias_var=mod_bias_var) + 1 # [BI] Add bias (initially 1). ww *= tf.cast(s[:, np.newaxis, np.newaxis, :, np.newaxis], w.dtype) # [BkkIO] Scale input feature maps. # Demodulate. if demodulate: d = tf.rsqrt(tf.reduce_sum(tf.square(ww), axis=[1,2,3]) + 1e-8) # [BO] Scaling factor. ww *= d[:, np.newaxis, np.newaxis, np.newaxis, :] # [BkkIO] Scale output feature maps. # Reshape/scale input. if fused_modconv: x = tf.reshape(x, [1, -1, x.shape[2], x.shape[3]]) # Fused => reshape minibatch to convolution groups. w = tf.reshape(tf.transpose(ww, [1, 2, 3, 0, 4]), [ww.shape[1], ww.shape[2], ww.shape[3], -1]) else: x *= tf.cast(s[:, :, np.newaxis, np.newaxis], x.dtype) # [BIhw] Not fused => scale input activations. # Convolution with optional up/downsampling. if up: x = upsample_conv_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel) elif down: x = conv_downsample_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel) else: x = tf.nn.conv2d(x, tf.cast(w, x.dtype), data_format='NCHW', strides=[1,1,1,1], padding='SAME') # Reshape/scale output. if fused_modconv: x = tf.reshape(x, [-1, fmaps, x.shape[2], x.shape[3]]) # Fused => reshape convolution groups back to minibatch. elif demodulate: x *= tf.cast(d[:, :, np.newaxis, np.newaxis], x.dtype) # [BOhw] Not fused => scale output activations. return x #---------------------------------------------------------------------------- # Minibatch standard deviation layer. def minibatch_stddev_layer(x, group_size=4, num_new_features=1): group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size. s = x.shape # [NCHW] Input shape. y = tf.reshape(x, [group_size, -1, num_new_features, s[1]//num_new_features, s[2], s[3]]) # [GMncHW] Split minibatch into M groups of size G. Split channels into n channel groups c. y = tf.cast(y, tf.float32) # [GMncHW] Cast to FP32. y -= tf.reduce_mean(y, axis=0, keepdims=True) # [GMncHW] Subtract mean over group. y = tf.reduce_mean(tf.square(y), axis=0) # [MncHW] Calc variance over group. y = tf.sqrt(y + 1e-8) # [MncHW] Calc stddev over group. y = tf.reduce_mean(y, axis=[2,3,4], keepdims=True) # [Mn111] Take average over fmaps and pixels. y = tf.reduce_mean(y, axis=[2]) # [Mn11] Split channels into c channel groups y = tf.cast(y, x.dtype) # [Mn11] Cast back to original data type. y = tf.tile(y, [group_size, 1, s[2], s[3]]) # [NnHW] Replicate over group and pixels. return tf.concat([x, y], axis=1) # [NCHW] Append as new fmap. #---------------------------------------------------------------------------- # Main generator network. # Composed of two sub-networks (mapping and synthesis) that are defined below. # Used in configs B-F (Table 1). def G_main( latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. labels_in, # Second input: Conditioning labels [minibatch, label_size]. truncation_psi = 0.5, # Style strength multiplier for the truncation trick. None = disable. truncation_cutoff = None, # Number of layers for which to apply the truncation trick. None = disable. truncation_psi_val = None, # Value for truncation_psi to use during validation. truncation_cutoff_val = None, # Value for truncation_cutoff to use during validation. dlatent_avg_beta = 0.995, # Decay for tracking the moving average of W during training. None = disable. style_mixing_prob = 0.9, # Probability of mixing styles during training. None = disable. is_training = False, # Network is under training? Enables and disables specific features. is_validation = False, # Network is under validation? Chooses which value to use for truncation_psi. return_dlatents = False, # Return dlatents in addition to the images? is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. components = dnnlib.EasyDict(), # Container for sub-networks. Retained between calls. mapping_func = 'G_mapping', # Build func name for the mapping network. synthesis_func = 'G_synthesis_stylegan2', # Build func name for the synthesis network. **kwargs): # Arguments for sub-networks (mapping and synthesis). # Validate arguments. assert not is_training or not is_validation assert isinstance(components, dnnlib.EasyDict) if is_validation: truncation_psi = truncation_psi_val truncation_cutoff = truncation_cutoff_val if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1): truncation_psi = None if is_training: truncation_cutoff = None if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1): dlatent_avg_beta = None if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0): style_mixing_prob = None # Setup components. if 'synthesis' not in components: components.synthesis = tflib.Network('G_synthesis', func_name=globals()[synthesis_func], **kwargs) num_layers = components.synthesis.input_shape[1] dlatent_size = components.synthesis.input_shape[2] if 'mapping' not in components: components.mapping = tflib.Network('G_mapping', func_name=globals()[mapping_func], dlatent_broadcast=num_layers, **kwargs) # Setup variables. lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False) dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False) # Evaluate mapping network. dlatents = components.mapping.get_output_for(latents_in, labels_in, is_training=is_training, **kwargs) dlatents = tf.cast(dlatents, tf.float32) # Update moving average of W. if dlatent_avg_beta is not None: with tf.variable_scope('DlatentAvg'): batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0) update_op = tf.assign(dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) with tf.control_dependencies([update_op]): dlatents = tf.identity(dlatents) # Perform style mixing regularization. if style_mixing_prob is not None: with tf.variable_scope('StyleMix'): latents2 = tf.random_normal(tf.shape(latents_in)) dlatents2 = components.mapping.get_output_for(latents2, labels_in, is_training=is_training, **kwargs) dlatents2 = tf.cast(dlatents2, tf.float32) layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2 mixing_cutoff = tf.cond( tf.random_uniform([], 0.0, 1.0) < style_mixing_prob, lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32), lambda: cur_layers) dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2) # Apply truncation trick. if truncation_psi is not None: with tf.variable_scope('Truncation'): layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] layer_psi = np.ones(layer_idx.shape, dtype=np.float32) if truncation_cutoff is None: layer_psi *= truncation_psi else: layer_psi = tf.where(layer_idx < truncation_cutoff, layer_psi * truncation_psi, layer_psi) dlatents = tflib.lerp(dlatent_avg, dlatents, layer_psi) # Evaluate synthesis network. deps = [] if 'lod' in components.synthesis.vars: deps.append(tf.assign(components.synthesis.vars['lod'], lod_in)) with tf.control_dependencies(deps): images_out = components.synthesis.get_output_for(dlatents, is_training=is_training, force_clean_graph=is_template_graph, **kwargs) # Return requested outputs. images_out = tf.identity(images_out, name='images_out') if return_dlatents: return images_out, dlatents return images_out #---------------------------------------------------------------------------- # Mapping network. # Transforms the input latent code (z) to the disentangled latent code (w). # Used in configs B-F (Table 1). def G_mapping( latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. labels_in, # Second input: Conditioning labels [minibatch, label_size]. latent_size = 512, # Latent vector (Z) dimensionality. label_size = 0, # Label dimensionality, 0 if no labels. dlatent_size = 512, # Disentangled latent (W) dimensionality. dlatent_broadcast = None, # Output disentangled latent (W) as [minibatch, dlatent_size] or [minibatch, dlatent_broadcast, dlatent_size]. mapping_layers = 8, # Number of mapping layers. mapping_fmaps = 512, # Number of activations in the mapping layers. mapping_lrmul = 0.01, # Learning rate multiplier for the mapping layers. mapping_nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. normalize_latents = True, # Normalize latent vectors (Z) before feeding them to the mapping layers? dtype = 'float32', # Data type to use for activations and outputs. **_kwargs): # Ignore unrecognized keyword args. act = mapping_nonlinearity # Inputs. latents_in.set_shape([None, latent_size]) labels_in.set_shape([None, label_size]) latents_in = tf.cast(latents_in, dtype) labels_in = tf.cast(labels_in, dtype) x = latents_in # Embed labels and concatenate them with latents. if label_size: with tf.variable_scope('LabelConcat'): w = tf.get_variable('weight', shape=[label_size, latent_size], initializer=tf.initializers.random_normal()) y = tf.matmul(labels_in, tf.cast(w, dtype)) x = tf.concat([x, y], axis=1) # Normalize latents. if normalize_latents: with tf.variable_scope('Normalize'): x *= tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + 1e-8) # Mapping layers. for layer_idx in range(mapping_layers): with tf.variable_scope('Dense%d' % layer_idx): fmaps = dlatent_size if layer_idx == mapping_layers - 1 else mapping_fmaps x = apply_bias_act(dense_layer(x, fmaps=fmaps, lrmul=mapping_lrmul), act=act, lrmul=mapping_lrmul) # Broadcast. if dlatent_broadcast is not None: with tf.variable_scope('Broadcast'): x = tf.tile(x[:, np.newaxis], [1, dlatent_broadcast, 1]) # Output. assert x.dtype == tf.as_dtype(dtype) return tf.identity(x, name='dlatents_out') #---------------------------------------------------------------------------- # StyleGAN synthesis network with revised architecture (Figure 2d). # Implements progressive growing, but no skip connections or residual nets (Figure 7). # Used in configs B-D (Table 1). def G_synthesis_stylegan_revised( dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size]. dlatent_size = 512, # Disentangled latent (W) dimensionality. num_channels = 3, # Number of output color channels. resolution = 1024, # Output resolution. fmap_base = 16 << 10, # Overall multiplier for the number of feature maps. fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. fmap_min = 1, # Minimum number of feature maps in any layer. fmap_max = 512, # Maximum number of feature maps in any layer. randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables. nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. dtype = 'float32', # Data type to use for activations and outputs. resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering. fused_modconv = True, # Implement modulated_conv2d_layer() as a single fused op? structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. force_clean_graph = False, # True = construct a clean graph that looks nice in TensorBoard, False = default behavior. **_kwargs): # Ignore unrecognized keyword args. resolution_log2 = int(np.log2(resolution)) assert resolution == 2**resolution_log2 and resolution >= 4 def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) if is_template_graph: force_clean_graph = True if force_clean_graph: randomize_noise = False if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive' act = nonlinearity num_layers = resolution_log2 * 2 - 2 images_out = None # Primary inputs. dlatents_in.set_shape([None, num_layers, dlatent_size]) dlatents_in = tf.cast(dlatents_in, dtype) lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype) # Noise inputs. noise_inputs = [] for layer_idx in range(num_layers - 1): res = (layer_idx + 5) // 2 shape = [1, 1, 2**res, 2**res] noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False)) # Single convolution layer with all the bells and whistles. def layer(x, layer_idx, fmaps, kernel, up=False): x = modulated_conv2d_layer(x, dlatents_in[:, layer_idx], fmaps=fmaps, kernel=kernel, up=up, resample_kernel=resample_kernel, fused_modconv=fused_modconv) if randomize_noise: noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype) else: noise = tf.cast(noise_inputs[layer_idx], x.dtype) noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros()) x += noise * tf.cast(noise_strength, x.dtype) return apply_bias_act(x, act=act) # Early layers. with tf.variable_scope('4x4'): with tf.variable_scope('Const'): x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.random_normal()) x = tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]) with tf.variable_scope('Conv'): x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3) # Building blocks for remaining layers. def block(res, x): # res = 3..resolution_log2 with tf.variable_scope('%dx%d' % (2**res, 2**res)): with tf.variable_scope('Conv0_up'): x = layer(x, layer_idx=res*2-5, fmaps=nf(res-1), kernel=3, up=True) with tf.variable_scope('Conv1'): x = layer(x, layer_idx=res*2-4, fmaps=nf(res-1), kernel=3) return x def torgb(res, x): # res = 2..resolution_log2 with tf.variable_scope('ToRGB_lod%d' % (resolution_log2 - res)): return apply_bias_act(modulated_conv2d_layer(x, dlatents_in[:, res*2-3], fmaps=num_channels, kernel=1, demodulate=False, fused_modconv=fused_modconv)) # Fixed structure: simple and efficient, but does not support progressive growing. if structure == 'fixed': for res in range(3, resolution_log2 + 1): x = block(res, x) images_out = torgb(resolution_log2, x) # Linear structure: simple but inefficient. if structure == 'linear': images_out = torgb(2, x) for res in range(3, resolution_log2 + 1): lod = resolution_log2 - res x = block(res, x) img = torgb(res, x) with tf.variable_scope('Upsample_lod%d' % lod): images_out = upsample_2d(images_out) with tf.variable_scope('Grow_lod%d' % lod): images_out = tflib.lerp_clip(img, images_out, lod_in - lod) # Recursive structure: complex but efficient. if structure == 'recursive': def cset(cur_lambda, new_cond, new_lambda): return lambda: tf.cond(new_cond, new_lambda, cur_lambda) def grow(x, res, lod): y = block(res, x) img = lambda: naive_upsample_2d(torgb(res, y), factor=2**lod) img = cset(img, (lod_in > lod), lambda: naive_upsample_2d(tflib.lerp(torgb(res, y), upsample_2d(torgb(res - 1, x)), lod_in - lod), factor=2**lod)) if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1)) return img() images_out = grow(x, 3, resolution_log2 - 3) assert images_out.dtype == tf.as_dtype(dtype) return tf.identity(images_out, name='images_out') #---------------------------------------------------------------------------- # StyleGAN2 synthesis network (Figure 7). # Implements skip connections and residual nets (Figure 7), but no progressive growing. # Used in configs E-F (Table 1). def G_synthesis_stylegan2( dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size]. dlatent_size = 512, # Disentangled latent (W) dimensionality. num_channels = 3, # Number of output color channels. resolution = 1024, # Output resolution. fmap_base = 16 << 10, # Overall multiplier for the number of feature maps. fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. fmap_min = 1, # Minimum number of feature maps in any layer. fmap_max = 512, # Maximum number of feature maps in any layer. randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables. architecture = 'skip', # Architecture: 'orig', 'skip', 'resnet'. nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. dtype = 'float32', # Data type to use for activations and outputs. resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering. fused_modconv = True, # Implement modulated_conv2d_layer() as a single fused op? **_kwargs): # Ignore unrecognized keyword args. resolution_log2 = int(np.log2(resolution)) assert resolution == 2**resolution_log2 and resolution >= 4 def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) assert architecture in ['orig', 'skip', 'resnet'] act = nonlinearity num_layers = resolution_log2 * 2 - 2 images_out = None # Primary inputs. dlatents_in.set_shape([None, num_layers, dlatent_size]) dlatents_in = tf.cast(dlatents_in, dtype) # Noise inputs. noise_inputs = [] for layer_idx in range(num_layers - 1): res = (layer_idx + 5) // 2 shape = [1, 1, 2**res, 2**res] noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False)) # Single convolution layer with all the bells and whistles. def layer(x, layer_idx, fmaps, kernel, up=False): x = modulated_conv2d_layer(x, dlatents_in[:, layer_idx], fmaps=fmaps, kernel=kernel, up=up, resample_kernel=resample_kernel, fused_modconv=fused_modconv) if randomize_noise: noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype) else: noise = tf.cast(noise_inputs[layer_idx], x.dtype) noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros()) x += noise * tf.cast(noise_strength, x.dtype) return apply_bias_act(x, act=act) # Building blocks for main layers. def block(x, res): # res = 3..resolution_log2 t = x with tf.variable_scope('Conv0_up'): x = layer(x, layer_idx=res*2-5, fmaps=nf(res-1), kernel=3, up=True) with tf.variable_scope('Conv1'): x = layer(x, layer_idx=res*2-4, fmaps=nf(res-1), kernel=3) if architecture == 'resnet': with tf.variable_scope('Skip'): t = conv2d_layer(t, fmaps=nf(res-1), kernel=1, up=True, resample_kernel=resample_kernel) x = (x + t) * (1 / np.sqrt(2)) return x def upsample(y): with tf.variable_scope('Upsample'): return upsample_2d(y, k=resample_kernel) def torgb(x, y, res): # res = 2..resolution_log2 with tf.variable_scope('ToRGB'): t = apply_bias_act(modulated_conv2d_layer(x, dlatents_in[:, res*2-3], fmaps=num_channels, kernel=1, demodulate=False, fused_modconv=fused_modconv)) return t if y is None else y + t # Early layers. y = None with tf.variable_scope('4x4'): with tf.variable_scope('Const'): x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.random_normal()) x = tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]) with tf.variable_scope('Conv'): x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3) if architecture == 'skip': y = torgb(x, y, 2) # Main layers. for res in range(3, resolution_log2 + 1): with tf.variable_scope('%dx%d' % (2**res, 2**res)): x = block(x, res) if architecture == 'skip': y = upsample(y) if architecture == 'skip' or res == resolution_log2: y = torgb(x, y, res) images_out = y assert images_out.dtype == tf.as_dtype(dtype) return tf.identity(images_out, name='images_out') #---------------------------------------------------------------------------- # Define a VectorQuantize function def VectorQuantizerEMA(inputs, is_training=True, embedding_dim=512, num_embeddings=2**8, decay=0.8, commitment_cost=1.0, epsilon=1e-5, **_kwargs): _embedding_dim = embedding_dim _num_embeddings = num_embeddings _decay = decay _commitment_cost = commitment_cost _epsilon = epsilon # with self._enter_variable_scope(): # initializer = tf.random_normal_initializer() # initializer = tf.initializers.variance_scaling(distribution='truncated_normal') # w is a matrix with an embedding in each column. When training, the # embedding is assigned to be the average of all inputs assigned to that # embedding. embedding_shape = [embedding_dim, num_embeddings] _w = tf.get_variable( 'embedding', embedding_shape, initializer=tf.variance_scaling_initializer(), use_resource=True) _ema_cluster_size = tf.get_variable( 'ema_cluster_size', [num_embeddings], initializer=tf.constant_initializer(0), use_resource=True) _ema_w = tf.get_variable( 'ema_dw', initializer=_w.initialized_value(), use_resource=True) inputs.set_shape([None, None, None, embedding_dim]) def quantize(encoding_indices): with tf.control_dependencies([encoding_indices]): w = tf.transpose(_w.read_value(), [1, 0]) return tf.nn.embedding_lookup(w, encoding_indices, validate_indices=False) with tf.control_dependencies([inputs]): w = _w.read_value() input_shape = tf.shape(inputs) with tf.control_dependencies([ tf.Assert(tf.equal(input_shape[-1], _embedding_dim), [input_shape])]): flat_inputs = tf.reshape(inputs, [-1, _embedding_dim]) distances = (tf.reduce_sum(flat_inputs ** 2, 1, keepdims=True) - 2 * tf.matmul(flat_inputs, w) + tf.reduce_sum(w ** 2, 0, keepdims=True)) encoding_indices = tf.argmax(- distances, 1) encodings = tf.one_hot(encoding_indices, _num_embeddings) encoding_indices = tf.reshape(encoding_indices, tf.shape(inputs)[:-1]) quantized = quantize(encoding_indices) e_latent_loss = tf.reduce_mean((tf.stop_gradient(quantized) - inputs) ** 2, axis=[1, 2, 3]) if is_training: updated_ema_cluster_size = moving_averages.assign_moving_average( _ema_cluster_size, tf.reduce_sum(encodings, 0), _decay) dw = tf.matmul(flat_inputs, encodings, transpose_a=True) updated_ema_w = moving_averages.assign_moving_average(_ema_w, dw, _decay) n = tf.reduce_sum(updated_ema_cluster_size) updated_ema_cluster_size = ( (updated_ema_cluster_size + _epsilon) / (n + _num_embeddings * _epsilon) * n) # print('here') normalised_updated_ema_w = ( updated_ema_w / tf.reshape(updated_ema_cluster_size, [1, -1])) with tf.control_dependencies([e_latent_loss]): update_w = tf.assign(_w, normalised_updated_ema_w) with tf.control_dependencies([update_w]): loss = _commitment_cost * e_latent_loss else: loss = _commitment_cost * e_latent_loss quantized = inputs + tf.stop_gradient(quantized - inputs) avg_probs = tf.reduce_mean(encodings, 0) perplexity = tf.exp(- tf.reduce_sum(avg_probs * tf.log(avg_probs + 1e-10))) return loss, perplexity, tf.transpose(quantized, perm=(0, 3, 1, 2)) #---------------------------------------------------------------------------- # Original StyleGAN discriminator. # Used in configs B-D (Table 1). def D_stylegan( images_in, # First input: Images [minibatch, channel, height, width]. labels_in, # Second input: Labels [minibatch, label_size]. num_channels = 3, # Number of input color channels. Overridden based on dataset. resolution = 1024, # Input resolution. Overridden based on dataset. label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. fmap_base = 16 << 10, # Overall multiplier for the number of feature maps. fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. fmap_min = 1, # Minimum number of feature maps in any layer. fmap_max = 512, # Maximum number of feature maps in any layer. nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable. mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer. dtype = 'float32', # Data type to use for activations and outputs. resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering. structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically. is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation. **_kwargs): # Ignore unrecognized keyword args. resolution_log2 = int(np.log2(resolution)) assert resolution == 2**resolution_log2 and resolution >= 4 def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive' act = nonlinearity images_in.set_shape([None, num_channels, resolution, resolution]) labels_in.set_shape([None, label_size]) images_in = tf.cast(images_in, dtype) labels_in = tf.cast(labels_in, dtype) lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype) # Building blocks for spatial layers. def fromrgb(x, res): # res = 2..resolution_log2 with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)): return apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=1), act=act) def block(x, res): # res = 2..resolution_log2 with tf.variable_scope('%dx%d' % (2**res, 2**res)): with tf.variable_scope('Conv0'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=3), act=act) with tf.variable_scope('Conv1_down'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-2), kernel=3, down=True, resample_kernel=resample_kernel), act=act) return x # Fixed structure: simple and efficient, but does not support progressive growing. if structure == 'fixed': x = fromrgb(images_in, resolution_log2) for res in range(resolution_log2, 2, -1): x = block(x, res) # Linear structure: simple but inefficient. if structure == 'linear': img = images_in x = fromrgb(img, resolution_log2) for res in range(resolution_log2, 2, -1): lod = resolution_log2 - res x = block(x, res) with tf.variable_scope('Downsample_lod%d' % lod): img = downsample_2d(img) y = fromrgb(img, res - 1) with tf.variable_scope('Grow_lod%d' % lod): x = tflib.lerp_clip(x, y, lod_in - lod) # Recursive structure: complex but efficient. if structure == 'recursive': def cset(cur_lambda, new_cond, new_lambda): return lambda: tf.cond(new_cond, new_lambda, cur_lambda) def grow(res, lod): x = lambda: fromrgb(naive_downsample_2d(images_in, factor=2**lod), res) if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1)) x = block(x(), res); y = lambda: x y = cset(y, (lod_in > lod), lambda: tflib.lerp(x, fromrgb(naive_downsample_2d(images_in, factor=2**(lod+1)), res - 1), lod_in - lod)) return y() x = grow(3, resolution_log2 - 3) # Final layers at 4x4 resolution. with tf.variable_scope('4x4'): if mbstd_group_size > 1: with tf.variable_scope('MinibatchStddev'): x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features) with tf.variable_scope('Conv'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(1), kernel=3), act=act) with tf.variable_scope('Dense0'): x = apply_bias_act(dense_layer(x, fmaps=nf(0)), act=act) # Output layer with label conditioning from "Which Training Methods for GANs do actually Converge?" with tf.variable_scope('Output'): x = apply_bias_act(dense_layer(x, fmaps=max(labels_in.shape[1], 1))) if labels_in.shape[1] > 0: x = tf.reduce_sum(x * labels_in, axis=1, keepdims=True) scores_out = x # Output. assert scores_out.dtype == tf.as_dtype(dtype) scores_out = tf.identity(scores_out, name='scores_out') return scores_out #---------------------------------------------------------------------------- # StyleGAN2 discriminator (Figure 7). # Implements skip connections and residual nets (Figure 7), but no progressive growing. # Used in configs E-F (Table 1). def D_stylegan2( images_in, # First input: Images [minibatch, channel, height, width]. labels_in, # Second input: Labels [minibatch, label_size]. num_channels = 3, # Number of input color channels. Overridden based on dataset. resolution = 1024, # Input resolution. Overridden based on dataset. label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. fmap_base = 16 << 10, # Overall multiplier for the number of feature maps. fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. fmap_min = 1, # Minimum number of feature maps in any layer. fmap_max = 512, # Maximum number of feature maps in any layer. architecture = 'resnet', # Architecture: 'orig', 'skip', 'resnet'. nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable. mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer. dtype = 'float32', # Data type to use for activations and outputs. resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering. **_kwargs): # Ignore unrecognized keyword args. resolution_log2 = int(np.log2(resolution)) assert resolution == 2**resolution_log2 and resolution >= 4 def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) assert architecture in ['orig', 'skip', 'resnet'] act = nonlinearity images_in.set_shape([None, num_channels, resolution, resolution]) labels_in.set_shape([None, label_size]) images_in = tf.cast(images_in, dtype) labels_in = tf.cast(labels_in, dtype) # Building blocks for main layers. def fromrgb(x, y, res): # res = 2..resolution_log2 with tf.variable_scope('FromRGB'): t = apply_bias_act(conv2d_layer(y, fmaps=nf(res-1), kernel=1), act=act) return t if x is None else x + t def block(x, res): # res = 2..resolution_log2 t = x with tf.variable_scope('Conv0'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=3), act=act) with tf.variable_scope('Conv1_down'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-2), kernel=3, down=True, resample_kernel=resample_kernel), act=act) if architecture == 'resnet': with tf.variable_scope('Skip'): t = conv2d_layer(t, fmaps=nf(res-2), kernel=1, down=True, resample_kernel=resample_kernel) x = (x + t) * (1 / np.sqrt(2)) return x def downsample(y): with tf.variable_scope('Downsample'): return downsample_2d(y, k=resample_kernel) # Main layers. x = None y = images_in for res in range(resolution_log2, 2, -1): with tf.variable_scope('%dx%d' % (2**res, 2**res)): if architecture == 'skip' or res == resolution_log2: x = fromrgb(x, y, res) x = block(x, res) if architecture == 'skip': y = downsample(y) # Final layers. with tf.variable_scope('4x4'): if architecture == 'skip': x = fromrgb(x, y, 2) if mbstd_group_size > 1: with tf.variable_scope('MinibatchStddev'): x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features) with tf.variable_scope('Conv'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(1), kernel=3), act=act) with tf.variable_scope('Dense0'): x = apply_bias_act(dense_layer(x, fmaps=nf(0)), act=act) # Output layer with label conditioning from "Which Training Methods for GANs do actually Converge?" with tf.variable_scope('Output'): x = apply_bias_act(dense_layer(x, fmaps=max(labels_in.shape[1], 1))) if labels_in.shape[1] > 0: x = tf.reduce_sum(x * labels_in, axis=1, keepdims=True) scores_out = x # Output. assert scores_out.dtype == tf.as_dtype(dtype) scores_out = tf.identity(scores_out, name='scores_out') return scores_out #---------------------------------------------------------------------------- def D_stylegan2_quant( images_in, # First input: Images [minibatch, channel, height, width]. labels_in, # Second input: Labels [minibatch, label_size]. num_channels = 3, # Number of input color channels. Overridden based on dataset. resolution = 1024, # Input resolution. Overridden based on dataset. label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. fmap_base = 16 << 10, # Overall multiplier for the number of feature maps. fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution. fmap_min = 1, # Minimum number of feature maps in any layer. fmap_max = 512, # Maximum number of feature maps in any layer. architecture = 'resnet', # Architecture: 'orig', 'skip', 'resnet'. nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc. mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable. mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer. dtype = 'float32', # Data type to use for activations and outputs. resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering. commitment_cost = 1.0, decay = 0.8, discrete_layer = '2', components = dnnlib.EasyDict(), # Container for sub-networks. Retained between calls. **_kwargs): # Ignore unrecognized keyword args. # q_layer = [int(x) for x in discrete_layer] resolution_log2 = int(np.log2(resolution)) assert resolution == 2**resolution_log2 and resolution >= 4 q_layer = [int(x) for x in discrete_layer] #K = {10:2**4, 9:2**4, 8:2**4, 7:2**5, 6:2**7, 5:2**8, 4:2**9, 3: 2**10} #q_layer = [(resolution_log2-2)//2+2, (resolution_log2-2)//2+3] res_dictsz_mapping = {10: 2**6, 9:2**6, 8:2**6, 7: 2**6, 6:2**7, 5:2**7, 4:2**7, 3:2**7} res_ch_mapping = {10: 2**5, 9:2**6, 8:2**7, 7: 2**8, 6:2**9, 5:2**9, 4:2**9, 3:2**9} def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max) assert architecture in ['orig', 'skip', 'resnet'] act = nonlinearity images_in.set_shape([None, num_channels, resolution, resolution]) labels_in.set_shape([None, label_size]) images_in = tf.cast(images_in, dtype) labels_in = tf.cast(labels_in, dtype) for res in q_layer: if 'discrete_mapping_%s'%str(res) not in components: components['discrete_mapping_%s'%str(res)] = tflib.Network('Discrete_mapping_%s'%str( res), num_embeddings=res_dictsz_mapping[res], decay=decay, embedding_dim=res_ch_mapping[res], commitment_cost=commitment_cost, func_name=VectorQuantizerEMA, **_kwargs) # Building blocks for main layers. def fromrgb(x, y, res): # res = 2..resolution_log2 with tf.variable_scope('FromRGB'): t = apply_bias_act(conv2d_layer(y, fmaps=nf(res-1), kernel=1), act=act) return t if x is None else x + t def block(x, res): # res = 2..resolution_log2 t = x with tf.variable_scope('Conv0'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=3), act=act) with tf.variable_scope('Conv1_down'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-2), kernel=3, down=True, resample_kernel=resample_kernel), act=act) if architecture == 'resnet': with tf.variable_scope('Skip'): t = conv2d_layer(t, fmaps=nf(res-2), kernel=1, down=True, resample_kernel=resample_kernel) x = (x + t) * (1 / np.sqrt(2)) return x def downsample(y): with tf.variable_scope('Downsample'): return downsample_2d(y, k=resample_kernel) # Main layers. x = None y = images_in quant_loss = 0 for res in range(resolution_log2, 2, -1): with tf.variable_scope('%dx%d' % (2**res, 2**res)): if architecture == 'skip' or res == resolution_log2: x = fromrgb(x, y, res) x = block(x, res) if res in q_layer: diff, ppl, quantized = components['discrete_mapping_%s'%str(res)].get_output_for( tf.transpose(x, perm=(0, 2, 3, 1)), is_training=True) quant_loss += diff if architecture == 'skip': y = downsample(y) # Final layers. with tf.variable_scope('4x4'): if architecture == 'skip': x = fromrgb(x, y, 2) if mbstd_group_size > 1: with tf.variable_scope('MinibatchStddev'): x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features) with tf.variable_scope('Conv'): x = apply_bias_act(conv2d_layer(x, fmaps=nf(1), kernel=3), act=act) with tf.variable_scope('Dense0'): x = apply_bias_act(dense_layer(x, fmaps=nf(0)), act=act) # Output layer with label conditioning from "Which Training Methods for GANs do actually Converge?" with tf.variable_scope('Output'): x = apply_bias_act(dense_layer(x, fmaps=max(labels_in.shape[1], 1))) if labels_in.shape[1] > 0: x = tf.reduce_sum(x * labels_in, axis=1, keepdims=True) scores_out = x # Output. assert scores_out.dtype == tf.as_dtype(dtype) scores_out = tf.identity(scores_out, name='scores_out') return scores_out, quant_loss, ppl #---------------------------------------------------------------------------- ================================================ FILE: FQ-StyleGAN/training/training_loop.py ================================================ # Copyright (c) 2019, NVIDIA Corporation. All rights reserved. # # This work is made available under the Nvidia Source Code License-NC. # To view a copy of this license, visit # https://nvlabs.github.io/stylegan2/license.html """Main training script.""" import numpy as np import tensorflow as tf import dnnlib import dnnlib.tflib as tflib from dnnlib.tflib.autosummary import autosummary from training import dataset from training import misc from metrics import metric_base #---------------------------------------------------------------------------- # Just-in-time processing of training images before feeding them to the networks. def process_reals(x, labels, lod, mirror_augment, drange_data, drange_net): with tf.name_scope('DynamicRange'): x = tf.cast(x, tf.float32) x = misc.adjust_dynamic_range(x, drange_data, drange_net) if mirror_augment: with tf.name_scope('MirrorAugment'): x = tf.where(tf.random_uniform([tf.shape(x)[0]]) < 0.5, x, tf.reverse(x, [3])) with tf.name_scope('FadeLOD'): # Smooth crossfade between consecutive levels-of-detail. s = tf.shape(x) y = tf.reshape(x, [-1, s[1], s[2]//2, 2, s[3]//2, 2]) y = tf.reduce_mean(y, axis=[3, 5], keepdims=True) y = tf.tile(y, [1, 1, 1, 2, 1, 2]) y = tf.reshape(y, [-1, s[1], s[2], s[3]]) x = tflib.lerp(x, y, lod - tf.floor(lod)) with tf.name_scope('UpscaleLOD'): # Upscale to match the expected input/output size of the networks. s = tf.shape(x) factor = tf.cast(2 ** tf.floor(lod), tf.int32) x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) x = tf.tile(x, [1, 1, 1, factor, 1, factor]) x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) return x, labels #---------------------------------------------------------------------------- # Evaluate time-varying training parameters. def training_schedule( cur_nimg, training_set, lod_initial_resolution = None, # Image resolution used at the beginning. lod_training_kimg = 600, # Thousands of real images to show before doubling the resolution. lod_transition_kimg = 600, # Thousands of real images to show when fading in new layers. minibatch_size_base = 32, # Global minibatch size. minibatch_size_dict = {}, # Resolution-specific overrides. minibatch_gpu_base = 4, # Number of samples processed at a time by one GPU. minibatch_gpu_dict = {}, # Resolution-specific overrides. G_lrate_base = 0.002, # Learning rate for the generator. G_lrate_dict = {}, # Resolution-specific overrides. D_lrate_base = 0.002, # Learning rate for the discriminator. D_lrate_dict = {}, # Resolution-specific overrides. lrate_rampup_kimg = 0, # Duration of learning rate ramp-up. tick_kimg_base = 4, # Default interval of progress snapshots. tick_kimg_dict = {8:28, 16:24, 32:20, 64:16, 128:12, 256:8, 512:6, 1024:4}): # Resolution-specific overrides. # Initialize result dict. s = dnnlib.EasyDict() s.kimg = cur_nimg / 1000.0 # Training phase. phase_dur = lod_training_kimg + lod_transition_kimg phase_idx = int(np.floor(s.kimg / phase_dur)) if phase_dur > 0 else 0 phase_kimg = s.kimg - phase_idx * phase_dur # Level-of-detail and resolution. if lod_initial_resolution is None: s.lod = 0.0 else: s.lod = training_set.resolution_log2 s.lod -= np.floor(np.log2(lod_initial_resolution)) s.lod -= phase_idx if lod_transition_kimg > 0: s.lod -= max(phase_kimg - lod_training_kimg, 0.0) / lod_transition_kimg s.lod = max(s.lod, 0.0) s.resolution = 2 ** (training_set.resolution_log2 - int(np.floor(s.lod))) # Minibatch size. s.minibatch_size = minibatch_size_dict.get(s.resolution, minibatch_size_base) s.minibatch_gpu = minibatch_gpu_dict.get(s.resolution, minibatch_gpu_base) # Learning rate. s.G_lrate = G_lrate_dict.get(s.resolution, G_lrate_base) s.D_lrate = D_lrate_dict.get(s.resolution, D_lrate_base) if lrate_rampup_kimg > 0: rampup = min(s.kimg / lrate_rampup_kimg, 1.0) s.G_lrate *= rampup s.D_lrate *= rampup # Other parameters. s.tick_kimg = tick_kimg_dict.get(s.resolution, tick_kimg_base) return s #---------------------------------------------------------------------------- # Main training script. def training_loop( G_args = {}, # Options for generator network. D_args = {}, # Options for discriminator network. G_opt_args = {}, # Options for generator optimizer. D_opt_args = {}, # Options for discriminator optimizer. G_loss_args = {}, # Options for generator loss. D_loss_args = {}, # Options for discriminator loss. dataset_args = {}, # Options for dataset.load_dataset(). sched_args = {}, # Options for train.TrainingSchedule. grid_args = {}, # Options for train.setup_snapshot_image_grid(). metric_arg_list = [], # Options for MetricGroup. tf_config = {}, # Options for tflib.init_tf(). data_dir = None, # Directory to load datasets from. G_smoothing_kimg = 10.0, # Half-life of the running average of generator weights. minibatch_repeats = 4, # Number of minibatches to run before adjusting training parameters. lazy_regularization = True, # Perform regularization as a separate training step? G_reg_interval = 4, # How often the perform regularization for G? Ignored if lazy_regularization=False. D_reg_interval = 16, # How often the perform regularization for D? Ignored if lazy_regularization=False. reset_opt_for_new_lod = True, # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced? total_kimg = 25000, # Total length of the training, measured in thousands of real images. mirror_augment = False, # Enable mirror augment? drange_net = [-1,1], # Dynamic range used when feeding image data to the networks. image_snapshot_ticks = 50, # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'. network_snapshot_ticks = 50, # How often to save network snapshots? None = only save 'networks-final.pkl'. save_tf_graph = False, # Include full TensorFlow computation graph in the tfevents file? save_weight_histograms = False, # Include weight histograms in the tfevents file? resume_pkl = None, # Network pickle to resume training from, None = train from scratch. resume_kimg = 0.0, # Assumed training progress at the beginning. Affects reporting and training schedule. resume_time = 0.0, # Assumed wallclock time at the beginning. Affects reporting. resume_with_new_nets = False): # Construct new networks according to G_args and D_args before resuming training? # Initialize dnnlib and TensorFlow. tflib.init_tf(tf_config) num_gpus = dnnlib.submit_config.num_gpus # Load training set. training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir), verbose=True, **dataset_args) grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(training_set, **grid_args) misc.save_image_grid(grid_reals, dnnlib.make_run_dir_path('reals.png'), drange=training_set.dynamic_range, grid_size=grid_size) # Construct or load networks. with tf.device('/gpu:0'): if resume_pkl is None or resume_with_new_nets: print('Constructing networks...') G = tflib.Network('G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **G_args) D = tflib.Network('D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **D_args) Gs = G.clone('Gs') if resume_pkl is not None: print('Loading networks from "%s"...' % resume_pkl) rG, rD, rGs = misc.load_pkl(resume_pkl) if resume_with_new_nets: G.copy_vars_from(rG); D.copy_vars_from(rD); Gs.copy_vars_from(rGs) else: G = rG; D = rD; Gs = rGs # Print layers and generate initial image snapshot. G.print_layers(); D.print_layers() sched = training_schedule(cur_nimg=total_kimg*1000, training_set=training_set, **sched_args) grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:]) grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu) misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes_init.png'), drange=drange_net, grid_size=grid_size) # Setup training inputs. print('Building TensorFlow graph...') with tf.name_scope('Inputs'), tf.device('/cpu:0'): lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[]) lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[]) minibatch_size_in = tf.placeholder(tf.int32, name='minibatch_size_in', shape=[]) minibatch_gpu_in = tf.placeholder(tf.int32, name='minibatch_gpu_in', shape=[]) minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in * num_gpus) Gs_beta = 0.5 ** tf.div(tf.cast(minibatch_size_in, tf.float32), G_smoothing_kimg * 1000.0) if G_smoothing_kimg > 0.0 else 0.0 # Setup optimizers. G_opt_args = dict(G_opt_args) D_opt_args = dict(D_opt_args) for args, reg_interval in [(G_opt_args, G_reg_interval), (D_opt_args, D_reg_interval)]: args['minibatch_multiplier'] = minibatch_multiplier args['learning_rate'] = lrate_in if lazy_regularization: mb_ratio = reg_interval / (reg_interval + 1) args['learning_rate'] *= mb_ratio if 'beta1' in args: args['beta1'] **= mb_ratio if 'beta2' in args: args['beta2'] **= mb_ratio G_opt = tflib.Optimizer(name='TrainG', **G_opt_args) D_opt = tflib.Optimizer(name='TrainD', **D_opt_args) G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args) D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args) # Build training graph for each GPU. data_fetch_ops = [] for gpu in range(num_gpus): with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu): # Create GPU-specific shadow copies of G and D. G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow') D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow') # Fetch training data via temporary variables. with tf.name_scope('DataFetch'): sched = training_schedule(cur_nimg=int(resume_kimg*1000), training_set=training_set, **sched_args) reals_var = tf.Variable(name='reals', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu] + training_set.shape)) labels_var = tf.Variable(name='labels', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu, training_set.label_size])) reals_write, labels_write = training_set.get_minibatch_tf() reals_write, labels_write = process_reals(reals_write, labels_write, lod_in, mirror_augment, training_set.dynamic_range, drange_net) reals_write = tf.concat([reals_write, reals_var[minibatch_gpu_in:]], axis=0) labels_write = tf.concat([labels_write, labels_var[minibatch_gpu_in:]], axis=0) data_fetch_ops += [tf.assign(reals_var, reals_write)] data_fetch_ops += [tf.assign(labels_var, labels_write)] reals_read = reals_var[:minibatch_gpu_in] labels_read = labels_var[:minibatch_gpu_in] # Evaluate loss functions. lod_assign_ops = [] if 'lod' in G_gpu.vars: lod_assign_ops += [tf.assign(G_gpu.vars['lod'], lod_in)] if 'lod' in D_gpu.vars: lod_assign_ops += [tf.assign(D_gpu.vars['lod'], lod_in)] with tf.control_dependencies(lod_assign_ops): with tf.name_scope('G_loss'): G_loss, G_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=G_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, **G_loss_args) with tf.name_scope('D_loss'): D_loss, D_reg, perplexity = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=D_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, reals=reals_read, labels=labels_read, **D_loss_args) # Register gradients. if not lazy_regularization: if G_reg is not None: G_loss += G_reg if D_reg is not None: D_loss += D_reg else: if G_reg is not None: G_reg_opt.register_gradients(tf.reduce_mean(G_reg * G_reg_interval), G_gpu.trainables) if D_reg is not None: D_reg_opt.register_gradients(tf.reduce_mean(D_reg * D_reg_interval), D_gpu.trainables) G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables) D_opt.register_gradients(tf.reduce_mean(D_loss), D_gpu.trainables) # Setup training ops. data_fetch_op = tf.group(*data_fetch_ops) G_train_op = G_opt.apply_updates() D_train_op = D_opt.apply_updates() G_reg_op = G_reg_opt.apply_updates(allow_no_op=True) D_reg_op = D_reg_opt.apply_updates(allow_no_op=True) Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta) # Finalize graph. with tf.device('/gpu:0'): try: peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse() except tf.errors.NotFoundError: peak_gpu_mem_op = tf.constant(0) tflib.init_uninitialized_vars() print('Initializing logs...') summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path()) if save_tf_graph: summary_log.add_graph(tf.get_default_graph()) if save_weight_histograms: G.setup_weight_histograms(); D.setup_weight_histograms() metrics = metric_base.MetricGroup(metric_arg_list) print('Training for %d kimg...\n' % total_kimg) dnnlib.RunContext.get().update('', cur_epoch=resume_kimg, max_epoch=total_kimg) maintenance_time = dnnlib.RunContext.get().get_last_update_interval() cur_nimg = int(resume_kimg * 1000) cur_tick = -1 tick_start_nimg = cur_nimg prev_lod = -1.0 running_mb_counter = 0 while cur_nimg < total_kimg * 1000: if dnnlib.RunContext.get().should_stop(): break # Choose training parameters and configure training ops. sched = training_schedule(cur_nimg=cur_nimg, training_set=training_set, **sched_args) assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0 training_set.configure(sched.minibatch_gpu, sched.lod) if reset_opt_for_new_lod: if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(sched.lod) != np.ceil(prev_lod): G_opt.reset_optimizer_state(); D_opt.reset_optimizer_state() prev_lod = sched.lod ppl = 0.0 # Run training ops. feed_dict = {lod_in: sched.lod, lrate_in: sched.G_lrate, minibatch_size_in: sched.minibatch_size, minibatch_gpu_in: sched.minibatch_gpu} for _repeat in range(minibatch_repeats): rounds = range(0, sched.minibatch_size, sched.minibatch_gpu * num_gpus) run_G_reg = (lazy_regularization and running_mb_counter % G_reg_interval == 0) run_D_reg = (lazy_regularization and running_mb_counter % D_reg_interval == 0) cur_nimg += sched.minibatch_size running_mb_counter += 1 # Fast path without gradient accumulation. if len(rounds) == 1: tflib.run([G_train_op, data_fetch_op], feed_dict) if run_G_reg: tflib.run(G_reg_op, feed_dict) tflib.run([D_train_op, Gs_update_op], feed_dict) if run_D_reg: _, ppl = tflib.run([D_reg_op, perplexity], feed_dict) # Slow path with gradient accumulation. else: for _round in rounds: tflib.run(G_train_op, feed_dict) if run_G_reg: for _round in rounds: tflib.run(G_reg_op, feed_dict) tflib.run(Gs_update_op, feed_dict) for _round in rounds: tflib.run(data_fetch_op, feed_dict) _, ppl = tflib.run([D_train_op, perplexity], feed_dict) if run_D_reg: for _round in rounds: _, ppl = tflib.run([D_reg_op, perplexity], feed_dict) # Perform maintenance tasks once per tick. done = (cur_nimg >= total_kimg * 1000) if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done: cur_tick += 1 tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0 tick_start_nimg = cur_nimg tick_time = dnnlib.RunContext.get().get_time_since_last_update() total_time = dnnlib.RunContext.get().get_time_since_start() + resume_time # Report progress. print('tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f' % ( autosummary('Progress/tick', cur_tick), autosummary('Progress/kimg', cur_nimg / 1000.0), autosummary('Progress/lod', sched.lod), autosummary('Progress/minibatch', sched.minibatch_size), dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)), autosummary('Timing/sec_per_tick', tick_time), autosummary('Timing/sec_per_kimg', tick_time / tick_kimg), autosummary('Timing/maintenance_sec', maintenance_time), autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2**30)), autosummary('Perplexity', ppl), ) autosummary('Timing/total_hours', total_time / (60.0 * 60.0)) autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0)) # Save snapshots. if image_snapshot_ticks is not None and (cur_tick % image_snapshot_ticks == 0 or done): grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu) misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes%06d.png' % (cur_nimg // 1000)), drange=drange_net, grid_size=grid_size) if network_snapshot_ticks is not None and (cur_tick % network_snapshot_ticks == 0 or done): pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' % (cur_nimg // 1000)) misc.save_pkl((G, D, Gs), pkl) metrics.run(pkl, run_dir=dnnlib.make_run_dir_path(), data_dir=dnnlib.convert_path(data_dir), num_gpus=num_gpus, tf_config=tf_config) # Update summaries and RunContext. metrics.update_autosummaries() tflib.autosummary.save_summaries(summary_log, cur_nimg) dnnlib.RunContext.get().update('%.2f' % sched.lod, cur_epoch=cur_nimg // 1000, max_epoch=total_kimg) maintenance_time = dnnlib.RunContext.get().get_last_update_interval() - tick_time # Save final snapshot. misc.save_pkl((G, D, Gs), dnnlib.make_run_dir_path('network-final.pkl')) # All done. summary_log.close() training_set.close() #---------------------------------------------------------------------------- ================================================ FILE: FQ-U-GAT-IT/LICENSE ================================================ MIT License Copyright (c) 2019 Junho Kim Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: FQ-U-GAT-IT/UGATIT.py ================================================ from ops import * from utils import * from glob import glob import time from tensorflow.contrib.data import prefetch_to_device, shuffle_and_repeat, map_and_batch import numpy as np import tensorflow as tf from tensorflow.python.training import moving_averages from vq_layer import VectorQuantizerEMA import shutil class UGATIT(object) : def __init__(self, sess, args): self.light = args.light self.if_quant = args.quant if self.light : self.model_name = 'UGATIT_light' else : self.model_name = 'UGATIT' self.sess = sess self.phase = args.phase self.checkpoint_dir = args.checkpoint_dir self.result_dir = args.result_dir self.log_dir = args.log_dir self.dataset_name = args.dataset self.augment_flag = args.augment_flag self.epoch = args.epoch self.iteration = args.iteration self.decay_flag = args.decay_flag self.decay_epoch = args.decay_epoch self.gan_type = args.gan_type self.batch_size = args.batch_size self.print_freq = args.print_freq self.save_freq = args.save_freq self.init_lr = args.lr self.ch = args.ch """ Weight """ self.adv_weight = args.adv_weight self.cycle_weight = args.cycle_weight self.identity_weight = args.identity_weight self.cam_weight = args.cam_weight self.ld = args.GP_ld self.smoothing = args.smoothing """ Generator """ self.n_res = args.n_res """ Discriminator """ self.n_dis = args.n_dis self.n_critic = args.n_critic self.sn = args.sn self.img_size = args.img_size self.img_ch = args.img_ch self.test_train = args.test_train if self.if_quant: self.commitment_cost = args.commitment_cost else: self.commitment_cost = 0.0 layerwise_channel = [64, 128, 256, 512, 1024, 2028] # num_embed = [5, 6, 7, 7, 7, 7] # num_embed = [5, 6, 7, 8, 9, 10] self.quantization_layer = args.quantization_layer self.quant_layers = [int(x) for x in args.quantization_layer] self.decay = args.decay self.sample_dir = os.path.join(args.sample_dir, self.model_dir) check_folder(self.sample_dir) self.result_dir = os.path.join(self.result_dir, self.model_dir) check_folder(self.result_dir) # self.trainA, self.trainB = prepare_data(dataset_name=self.dataset_name, size=self.img_size self.trainA_dataset = glob('./dataset/{}/*.*'.format(self.dataset_name + '/trainA')) self.trainB_dataset = glob('./dataset/{}/*.*'.format(self.dataset_name + '/trainB')) self.dataset_num = max(len(self.trainA_dataset), len(self.trainB_dataset)) self.quantize = {} for layer in self.quant_layers: self.quantize[layer] = VectorQuantizerEMA(embedding_dim=layerwise_channel[layer], num_embeddings=2**num_embed[layer], commitment_cost=self.commitment_cost, decay=self.decay) print() print("##### Information #####") print("# light : ", self.light) print("# gan type : ", self.gan_type) print("# dataset : ", self.dataset_name) print("# max dataset number : ", self.dataset_num) print("# batch_size : ", self.batch_size) print("# epoch : ", self.epoch) print("# iteration per epoch : ", self.iteration) print("# smoothing : ", self.smoothing) print() print("##### Generator #####") print("# residual blocks : ", self.n_res) print() print("##### Discriminator #####") print("# discriminator layer : ", self.n_dis) print("# the number of critic : ", self.n_critic) print("# spectral normalization : ", self.sn) print() print("##### Weight #####") print("# adv_weight : ", self.adv_weight) print("# cycle_weight : ", self.cycle_weight) print("# identity_weight : ", self.identity_weight) print("# cam_weight : ", self.cam_weight) @property def model_dir(self): n_res = str(self.n_res) + 'resblock' n_dis = str(self.n_dis) + 'dis' if self.smoothing : smoothing = '_smoothing' else : smoothing = '' if self.sn : sn = '_sn' else : sn = '' if not self.if_quant: return "{}_{}_{}_{}_{}_{}_{}_{}_{}_{}{}{}".format(self.model_name, self.dataset_name, self.gan_type, n_res, n_dis, self.n_critic, self.adv_weight, self.cycle_weight, self.identity_weight, self.cam_weight, sn, smoothing) else: return "{}_q_{}_{}_{}_{}_{}_{}_{}_{}_{}{}{}_{}_{}_{}".format(self.model_name, self.dataset_name, self.gan_type, n_res, n_dis, self.n_critic, self.adv_weight, self.cycle_weight, self.identity_weight, self.cam_weight, sn, smoothing, self.quantization_layer, self.commitment_cost, self.decay) ################################################################################## # Generator ################################################################################## def generator(self, x_init, reuse=False, scope="generator"): channel = self.ch with tf.variable_scope(scope, reuse=reuse) : x = conv(x_init, channel, kernel=7, stride=1, pad=3, pad_type='reflect', scope='conv') x = instance_norm(x, scope='ins_norm') x = relu(x) # Down-Sampling for i in range(2) : x = conv(x, channel*2, kernel=3, stride=2, pad=1, pad_type='reflect', scope='conv_'+str(i)) x = instance_norm(x, scope='ins_norm_'+str(i)) x = relu(x) channel = channel * 2 # Down-Sampling Bottleneck for i in range(self.n_res): x = resblock(x, channel, scope='resblock_' + str(i)) # Class Activation Map cam_x = global_avg_pooling(x) cam_gap_logit, cam_x_weight = fully_connected_with_w(cam_x, scope='CAM_logit') x_gap = tf.multiply(x, cam_x_weight) cam_x = global_max_pooling(x) cam_gmp_logit, cam_x_weight = fully_connected_with_w(cam_x, reuse=True, scope='CAM_logit') x_gmp = tf.multiply(x, cam_x_weight) cam_logit = tf.concat([cam_gap_logit, cam_gmp_logit], axis=-1) x = tf.concat([x_gap, x_gmp], axis=-1) x = conv(x, channel, kernel=1, stride=1, scope='conv_1x1') x = relu(x) heatmap = tf.squeeze(tf.reduce_sum(x, axis=-1)) # Gamma, Beta block gamma, beta = self.MLP(x, reuse=reuse) # Up-Sampling Bottleneck for i in range(self.n_res): x = adaptive_ins_layer_resblock(x, channel, gamma, beta, smoothing=self.smoothing, scope='adaptive_resblock' + str(i)) # Up-Sampling for i in range(2) : x = up_sample(x, scale_factor=2) x = conv(x, channel//2, kernel=3, stride=1, pad=1, pad_type='reflect', scope='up_conv_'+str(i)) x = layer_instance_norm(x, scope='layer_ins_norm_'+str(i)) x = relu(x) channel = channel // 2 x = conv(x, channels=3, kernel=7, stride=1, pad=3, pad_type='reflect', scope='G_logit') x = tanh(x) return x, cam_logit, heatmap def MLP(self, x, use_bias=True, reuse=False, scope='MLP'): channel = self.ch * self.n_res if self.light : x = global_avg_pooling(x) with tf.variable_scope(scope, reuse=reuse): for i in range(2) : x = fully_connected(x, channel, use_bias, scope='linear_' + str(i)) x = relu(x) gamma = fully_connected(x, channel, use_bias, scope='gamma') beta = fully_connected(x, channel, use_bias, scope='beta') gamma = tf.reshape(gamma, shape=[self.batch_size, 1, 1, channel]) beta = tf.reshape(beta, shape=[self.batch_size, 1, 1, channel]) return gamma, beta ################################################################################## # Discriminator ################################################################################## def discriminator(self, x_init, reuse=False, scope="discriminator"): D_logit = [] D_CAM_logit = [] with tf.variable_scope(scope, reuse=reuse) : local_x, local_cam, local_heatmap = self.discriminator_local(x_init, reuse=reuse, scope='local') global_x, global_cam, global_heatmap, quant_loss, ppl = self.discriminator_global( x_init, reuse=reuse, scope='global') D_logit.extend([local_x, global_x]) D_CAM_logit.extend([local_cam, global_cam]) return D_logit, D_CAM_logit, local_heatmap, global_heatmap, quant_loss, ppl def discriminator_global(self, x_init, reuse=False, scope='discriminator_global'): with tf.variable_scope(scope, reuse=reuse): quant_loss = 0 channel = self.ch x = conv(x_init, channel, kernel=4, stride=2, pad=1, pad_type='reflect', sn=self.sn, scope='conv_0') x = lrelu(x, 0.2) for i in range(1, self.n_dis - 1): x = conv(x, channel * 2, kernel=4, stride=2, pad=1, pad_type='reflect', sn=self.sn, scope='conv_' + str(i)) x = lrelu(x, 0.2) if i in self.quant_layers: diff, ppl = self.quantize[i](x, reuse, layer=i) quant_loss += diff channel = channel * 2 x = conv(x, channel * 2, kernel=4, stride=1, pad=1, pad_type='reflect', sn=self.sn, scope='conv_last') x = lrelu(x, 0.2) channel = channel * 2 cam_x = global_avg_pooling(x) cam_gap_logit, cam_x_weight = fully_connected_with_w(cam_x, sn=self.sn, scope='CAM_logit') x_gap = tf.multiply(x, cam_x_weight) cam_x = global_max_pooling(x) cam_gmp_logit, cam_x_weight = fully_connected_with_w(cam_x, sn=self.sn, reuse=True, scope='CAM_logit') x_gmp = tf.multiply(x, cam_x_weight) cam_logit = tf.concat([cam_gap_logit, cam_gmp_logit], axis=-1) x = tf.concat([x_gap, x_gmp], axis=-1) x = conv(x, channel, kernel=1, stride=1, scope='conv_1x1') x = lrelu(x, 0.2) heatmap = tf.squeeze(tf.reduce_sum(x, axis=-1)) x = conv(x, channels=1, kernel=4, stride=1, pad=1, pad_type='reflect', sn=self.sn, scope='D_logit') return x, cam_logit, heatmap, quant_loss, ppl def discriminator_local(self, x_init, reuse=False, scope='discriminator_local'): with tf.variable_scope(scope, reuse=reuse) : channel = self.ch x = conv(x_init, channel, kernel=4, stride=2, pad=1, pad_type='reflect', sn=self.sn, scope='conv_0') x = lrelu(x, 0.2) for i in range(1, self.n_dis - 2 - 1): x = conv(x, channel * 2, kernel=4, stride=2, pad=1, pad_type='reflect', sn=self.sn, scope='conv_' + str(i)) x = lrelu(x, 0.2) channel = channel * 2 x = conv(x, channel * 2, kernel=4, stride=1, pad=1, pad_type='reflect', sn=self.sn, scope='conv_last') x = lrelu(x, 0.2) channel = channel * 2 cam_x = global_avg_pooling(x) cam_gap_logit, cam_x_weight = fully_connected_with_w(cam_x, sn=self.sn, scope='CAM_logit') x_gap = tf.multiply(x, cam_x_weight) cam_x = global_max_pooling(x) cam_gmp_logit, cam_x_weight = fully_connected_with_w(cam_x, sn=self.sn, reuse=True, scope='CAM_logit') x_gmp = tf.multiply(x, cam_x_weight) cam_logit = tf.concat([cam_gap_logit, cam_gmp_logit], axis=-1) x = tf.concat([x_gap, x_gmp], axis=-1) x = conv(x, channel, kernel=1, stride=1, scope='conv_1x1') x = lrelu(x, 0.2) heatmap = tf.squeeze(tf.reduce_sum(x, axis=-1)) x = conv(x, channels=1, kernel=4, stride=1, pad=1, pad_type='reflect', sn=self.sn, scope='D_logit') return x, cam_logit, heatmap ################################################################################## # Model ################################################################################## def generate_a2b(self, x_A, reuse=False): out, cam, _ = self.generator(x_A, reuse=reuse, scope="generator_B") return out, cam def generate_b2a(self, x_B, reuse=False): out, cam, _ = self.generator(x_B, reuse=reuse, scope="generator_A") return out, cam def discriminate_real(self, x_A, x_B): real_A_logit, real_A_cam_logit, _, _, quant_loss_A, ppl_A = self.discriminator(x_A, scope="discriminator_A") real_B_logit, real_B_cam_logit, _, _, quant_loss_B, ppl_B = self.discriminator(x_B, scope="discriminator_B") return real_A_logit, real_A_cam_logit, real_B_logit, real_B_cam_logit, \ quant_loss_A+quant_loss_B, ppl_A+ppl_B def discriminate_fake(self, x_ba, x_ab): fake_A_logit, fake_A_cam_logit, _, _, quant_loss_A, ppl_A = self.discriminator(x_ba, reuse=True, scope="discriminator_A") fake_B_logit, fake_B_cam_logit, _, _, quant_loss_B, ppl_B = self.discriminator(x_ab, reuse=True, scope="discriminator_B") return fake_A_logit, fake_A_cam_logit, fake_B_logit, fake_B_cam_logit, \ quant_loss_A+quant_loss_B, (ppl_A+ppl_B)/2 def gradient_panalty(self, real, fake, scope="discriminator_A"): if self.gan_type.__contains__('dragan'): eps = tf.random_uniform(shape=tf.shape(real), minval=0., maxval=1.) _, x_var = tf.nn.moments(real, axes=[0, 1, 2, 3]) x_std = tf.sqrt(x_var) # magnitude of noise decides the size of local region fake = real + 0.5 * x_std * eps alpha = tf.random_uniform(shape=[self.batch_size, 1, 1, 1], minval=0., maxval=1.) interpolated = real + alpha * (fake - real) logit, cam_logit, _, _, _, _ = self.discriminator(interpolated, reuse=True, scope=scope) GP = [] cam_GP = [] for i in range(2) : grad = tf.gradients(logit[i], interpolated)[0] # gradient of D(interpolated) grad_norm = tf.norm(flatten(grad), axis=1) # l2 norm # WGAN - LP if self.gan_type == 'wgan-lp' : GP.append(self.ld * tf.reduce_mean(tf.square(tf.maximum(0.0, grad_norm - 1.)))) elif self.gan_type == 'wgan-gp' or self.gan_type == 'dragan': GP.append(self.ld * tf.reduce_mean(tf.square(grad_norm - 1.))) for i in range(2) : grad = tf.gradients(cam_logit[i], interpolated)[0] # gradient of D(interpolated) grad_norm = tf.norm(flatten(grad), axis=1) # l2 norm # WGAN - LP if self.gan_type == 'wgan-lp' : cam_GP.append(self.ld * tf.reduce_mean(tf.square(tf.maximum(0.0, grad_norm - 1.)))) elif self.gan_type == 'wgan-gp' or self.gan_type == 'dragan': cam_GP.append(self.ld * tf.reduce_mean(tf.square(grad_norm - 1.))) return sum(GP), sum(cam_GP) def build_model(self): if self.phase == 'train' : self.lr = tf.placeholder(tf.float32, name='learning_rate') """ Input Image""" Image_Data_Class = ImageData(self.img_size, self.img_ch, self.augment_flag) trainA = tf.data.Dataset.from_tensor_slices(self.trainA_dataset) trainB = tf.data.Dataset.from_tensor_slices(self.trainB_dataset) gpu_device = '/gpu:0' trainA = trainA.apply(shuffle_and_repeat(self.dataset_num)).apply(map_and_batch(Image_Data_Class.image_processing, self.batch_size, num_parallel_batches=16, drop_remainder=True)).apply(prefetch_to_device(gpu_device, None)) trainB = trainB.apply(shuffle_and_repeat(self.dataset_num)).apply(map_and_batch(Image_Data_Class.image_processing, self.batch_size, num_parallel_batches=16, drop_remainder=True)).apply(prefetch_to_device(gpu_device, None)) trainA_iterator = trainA.make_one_shot_iterator() trainB_iterator = trainB.make_one_shot_iterator() self.domain_A = trainA_iterator.get_next() self.domain_B = trainB_iterator.get_next() """ Define Generator, Discriminator """ x_ab, cam_ab = self.generate_a2b(self.domain_A) # real a x_ba, cam_ba = self.generate_b2a(self.domain_B) # real b x_aba, _ = self.generate_b2a(x_ab, reuse=True) # real b x_bab, _ = self.generate_a2b(x_ba, reuse=True) # real a x_aa, cam_aa = self.generate_b2a(self.domain_A, reuse=True) # fake b x_bb, cam_bb = self.generate_a2b(self.domain_B, reuse=True) # fake a real_A_logit, real_A_cam_logit, real_B_logit, real_B_cam_logit, real_quant_loss,\ real_ppl = self.discriminate_real(self.domain_A, self.domain_B) fake_A_logit, fake_A_cam_logit, fake_B_logit, fake_B_cam_logit, fake_quant_loss, \ fake_ppl = self.discriminate_fake(x_ba, x_ab) self.ppl = real_ppl + fake_ppl """ Define Loss """ if self.gan_type.__contains__('wgan') or self.gan_type == 'dragan' : GP_A, GP_CAM_A = self.gradient_panalty(real=self.domain_A, fake=x_ba, scope="discriminator_A") GP_B, GP_CAM_B = self.gradient_panalty(real=self.domain_B, fake=x_ab, scope="discriminator_B") else : GP_A, GP_CAM_A = 0, 0 GP_B, GP_CAM_B = 0, 0 G_ad_loss_A = (generator_loss(self.gan_type, fake_A_logit) + generator_loss(self.gan_type, fake_A_cam_logit)) G_ad_loss_B = (generator_loss(self.gan_type, fake_B_logit) + generator_loss(self.gan_type, fake_B_cam_logit)) D_ad_loss_A = (discriminator_loss(self.gan_type, real_A_logit, fake_A_logit) + discriminator_loss(self.gan_type, real_A_cam_logit, fake_A_cam_logit) + GP_A + GP_CAM_A) D_ad_loss_B = (discriminator_loss(self.gan_type, real_B_logit, fake_B_logit) + discriminator_loss(self.gan_type, real_B_cam_logit, fake_B_cam_logit) + GP_B + GP_CAM_B) reconstruction_A = L1_loss(x_aba, self.domain_A) # reconstruction reconstruction_B = L1_loss(x_bab, self.domain_B) # reconstruction identity_A = L1_loss(x_aa, self.domain_A) identity_B = L1_loss(x_bb, self.domain_B) cam_A = cam_loss(source=cam_ba, non_source=cam_aa) cam_B = cam_loss(source=cam_ab, non_source=cam_bb) Generator_A_gan = self.adv_weight * G_ad_loss_A Generator_A_cycle = self.cycle_weight * reconstruction_B Generator_A_identity = self.identity_weight * identity_A Generator_A_cam = self.cam_weight * cam_A Generator_B_gan = self.adv_weight * G_ad_loss_B Generator_B_cycle = self.cycle_weight * reconstruction_A Generator_B_identity = self.identity_weight * identity_B Generator_B_cam = self.cam_weight * cam_B Generator_A_loss = Generator_A_gan + Generator_A_cycle + Generator_A_identity + Generator_A_cam Generator_B_loss = Generator_B_gan + Generator_B_cycle + Generator_B_identity + Generator_B_cam Discriminator_A_loss = self.adv_weight * D_ad_loss_A Discriminator_B_loss = self.adv_weight * D_ad_loss_B self.Generator_loss = Generator_A_loss + Generator_B_loss + regularization_loss( 'generator') + fake_quant_loss self.Discriminator_loss = Discriminator_A_loss + Discriminator_B_loss + \ regularization_loss('discriminator') + real_quant_loss + fake_quant_loss """ Result Image """ self.fake_A = x_ba self.fake_B = x_ab self.real_A = self.domain_A self.real_B = self.domain_B """ Training """ t_vars = tf.trainable_variables() G_vars = [var for var in t_vars if 'generator' in var.name] D_vars = [var for var in t_vars if 'discriminator' in var.name] self.G_optim = tf.train.AdamOptimizer(self.lr, beta1=0.5, beta2=0.999).minimize(self.Generator_loss, var_list=G_vars) self.D_optim = tf.train.AdamOptimizer(self.lr, beta1=0.5, beta2=0.999).minimize(self.Discriminator_loss, var_list=D_vars) """" Summary """ self.all_G_loss = tf.summary.scalar("Generator_loss", self.Generator_loss) self.all_D_loss = tf.summary.scalar("Discriminator_loss", self.Discriminator_loss) self.G_A_loss = tf.summary.scalar("G_A_loss", Generator_A_loss) self.G_A_gan = tf.summary.scalar("G_A_gan", Generator_A_gan) self.G_A_cycle = tf.summary.scalar("G_A_cycle", Generator_A_cycle) self.G_A_identity = tf.summary.scalar("G_A_identity", Generator_A_identity) self.G_A_cam = tf.summary.scalar("G_A_cam", Generator_A_cam) self.G_B_loss = tf.summary.scalar("G_B_loss", Generator_B_loss) self.G_B_gan = tf.summary.scalar("G_B_gan", Generator_B_gan) self.G_B_cycle = tf.summary.scalar("G_B_cycle", Generator_B_cycle) self.G_B_identity = tf.summary.scalar("G_B_identity", Generator_B_identity) self.G_B_cam = tf.summary.scalar("G_B_cam", Generator_B_cam) self.D_A_loss = tf.summary.scalar("D_A_loss", Discriminator_A_loss) self.D_B_loss = tf.summary.scalar("D_B_loss", Discriminator_B_loss) self.rho_var = [] for var in tf.trainable_variables(): if 'rho' in var.name: self.rho_var.append(tf.summary.histogram(var.name, var)) self.rho_var.append(tf.summary.scalar(var.name + "_min", tf.reduce_min(var))) self.rho_var.append(tf.summary.scalar(var.name + "_max", tf.reduce_max(var))) self.rho_var.append(tf.summary.scalar(var.name + "_mean", tf.reduce_mean(var))) g_summary_list = [self.G_A_loss, self.G_A_gan, self.G_A_cycle, self.G_A_identity, self.G_A_cam, self.G_B_loss, self.G_B_gan, self.G_B_cycle, self.G_B_identity, self.G_B_cam, self.all_G_loss] g_summary_list.extend(self.rho_var) d_summary_list = [self.D_A_loss, self.D_B_loss, self.all_D_loss] self.G_loss = tf.summary.merge(g_summary_list) self.D_loss = tf.summary.merge(d_summary_list) # self.ppl = tf.summary.scalar('Perplexity', self.ppl) if self.test_train: """ Test """ self.test_domain_A = tf.placeholder(tf.float32, [1, self.img_size, self.img_size, self.img_ch], name='test_domain_A') self.test_domain_B = tf.placeholder(tf.float32, [1, self.img_size, self.img_size, self.img_ch], name='test_domain_B') self.test_fake_B, _ = self.generate_a2b(self.test_domain_A, reuse=True) self.test_fake_A, _ = self.generate_b2a(self.test_domain_B, reuse=True) elif self.phase == 'test': self.test_domain_A = tf.placeholder(tf.float32, [1, self.img_size, self.img_size, self.img_ch], name='test_domain_A') self.test_domain_B = tf.placeholder(tf.float32, [1, self.img_size, self.img_size, self.img_ch], name='test_domain_B') self.test_fake_B, _ = self.generate_a2b(self.test_domain_A) self.test_fake_A, _ = self.generate_b2a(self.test_domain_B) def train(self): # initialize all variables tf.global_variables_initializer().run() # saver to save model self.saver = tf.train.Saver() # summary writer self.writer = tf.summary.FileWriter(self.log_dir + '/' + self.model_dir, self.sess.graph) # restore check-point if it exits could_load, checkpoint_counter = self.load(self.checkpoint_dir) if could_load: start_epoch = (int)(checkpoint_counter / self.iteration) start_batch_id = checkpoint_counter - start_epoch * self.iteration counter = checkpoint_counter print(" [*] Load SUCCESS") else: start_epoch = 0 start_batch_id = 0 counter = 1 print(" [!] Load failed...") # loop for epoch start_time = time.time() past_g_loss = -1. lr = self.init_lr for epoch in range(start_epoch, self.epoch): # lr = self.init_lr if epoch < self.decay_epoch else self.init_lr * (self.epoch - epoch) / (self.epoch - self.decay_epoch) if self.decay_flag : #lr = self.init_lr * pow(0.5, epoch // self.decay_epoch) lr = self.init_lr if epoch < self.decay_epoch else self.init_lr * (self.epoch - epoch) / (self.epoch - self.decay_epoch) for idx in range(start_batch_id, self.iteration): train_feed_dict = { self.lr : lr } # Update D _, d_loss, summary_str, ppl = self.sess.run([self.D_optim, self.Discriminator_loss, self.D_loss, self.ppl], feed_dict = train_feed_dict) self.writer.add_summary(summary_str, counter) # Update G g_loss = None if (counter - 1) % self.n_critic == 0 : batch_A_images, batch_B_images, fake_A, fake_B, _, g_loss, summary_str = self.sess.run([self.real_A, self.real_B, self.fake_A, self.fake_B, self.G_optim, self.Generator_loss, self.G_loss], feed_dict = train_feed_dict) self.writer.add_summary(summary_str, counter) past_g_loss = g_loss # display training status counter += 1 if g_loss == None : g_loss = past_g_loss if idx % 1000==0: print("Epoch: [%2d] [%5d/%5d] time: %4.4f d_loss: %.8f, g_loss: %.8f, ppl: %.4f" "" % (epoch, idx, self.iteration, time.time() - start_time, d_loss, g_loss, ppl)) if np.mod(idx+1, self.print_freq) == 0 : save_images(batch_A_images, [self.batch_size, 1], './{}/real_A_{:03d}_{:05d}.png'.format(self.sample_dir, epoch, idx+1)) # save_images(batch_B_images, [self.batch_size, 1], # './{}/real_B_{:03d}_{:05d}.png'.format(self.sample_dir, epoch, idx+1)) # save_images(fake_A, [self.batch_size, 1], # './{}/fake_A_{:03d}_{:05d}.png'.format(self.sample_dir, epoch, idx+1)) save_images(fake_B, [self.batch_size, 1], './{}/fake_B_{:03d}_{:05d}.png'.format(self.sample_dir, epoch, idx+1)) # if np.mod(idx + 1, self.save_freq) == 0: # self.save(self.checkpoint_dir, counter) # After an epoch, start_batch_id is set to zero # non-zero value is only for the first epoch after loading pre-trained model start_batch_id = 0 # if epoch % 2 == 0: self.test(epoch) # save model for final step if np.mod(epoch+1, 5) == 0: self.save(self.checkpoint_dir, counter) def save(self, checkpoint_dir, step): checkpoint_dir = os.path.join(checkpoint_dir, self.model_dir) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) save_solid = False while not save_solid: try: self.saver.save(self.sess, os.path.join(checkpoint_dir, self.model_name + '.model'), global_step=step) # print('ckpt saved...') save_solid = True except: pass def load(self, checkpoint_dir): print(" [*] Reading checkpoints...") checkpoint_dir = os.path.join(checkpoint_dir, self.model_dir) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: ckpt_name = os.path.basename(ckpt.model_checkpoint_path) self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name)) counter = int(ckpt_name.split('-')[-1]) print(" [*] Success to read {}".format(ckpt_name)) return True, counter else: print(" [*] Failed to find a checkpoint") return False, 0 def test(self, epoch): if not self.test_train: tf.global_variables_initializer().run() self.saver = tf.train.Saver() could_load, checkpoint_counter = self.load(self.checkpoint_dir) if could_load : print(" [*] Load SUCCESS") else : print(" [!] Load failed...") test_A_root = './dataset/{}'.format(self.dataset_name+'/testA') test_B_root = './dataset/{}'.format(self.dataset_name+'/testB') train_A_root = './dataset/{}'.format(self.dataset_name + '/trainA') train_B_root = './dataset/{}'.format(self.dataset_name + '/trainB') test_A_files = glob('./dataset/{}/*.*'.format(self.dataset_name + '/testA')) test_B_files = glob('./dataset/{}/*.*'.format(self.dataset_name + '/testB')) A2B_root = os.path.join(self.result_dir, '{:03d}-{}'.format(epoch, 'A-B')) B2A_root = os.path.join(self.result_dir, '{:03d}-{}'.format(epoch, 'B-A')) # check_folder(self.result_dir) check_folder(A2B_root) check_folder(B2A_root) for sample_file in test_A_files : # A -> B # print('Processing A image: ' + sample_file) sample_image = np.asarray(load_test_data(sample_file, size=self.img_size)) image_path = os.path.join(A2B_root, os.path.basename(sample_file)) fake_img = self.sess.run(self.test_fake_B, feed_dict = {self.test_domain_A : sample_image}) save_images(fake_img, [1, 1], image_path) for sample_file in test_B_files : # B -> A sample_image = np.asarray(load_test_data(sample_file, size=self.img_size)) image_path = os.path.join(B2A_root, os.path.basename(sample_file)) fake_img = self.sess.run(self.test_fake_A, feed_dict = {self.test_domain_B : sample_image}) save_images(fake_img, [1, 1], image_path) ================================================ FILE: FQ-U-GAT-IT/dataset/download_dataset_1.sh ================================================ DATASET=$1 if [[$DATASET != "portrait" && $DATASET != "cat2dog"]]; then echo "dataset not available" exit fi URL=http://vllab.ucmerced.edu/hylee/DRIT/datasets/$DATASET.zip wget -N $URL -O ../dataset/$DATASET.zip unzip ../dataset/$DATASET.zip -d ../dataset rm ../dataset/$DATASET.zip ================================================ FILE: FQ-U-GAT-IT/download_dataset_2.sh ================================================ #!/bin/bash # https://github.com/junyanz/CycleGAN/blob/master/datasets/download_dataset.sh FILE=$1 if [[ $FILE != "ae_photos" && $FILE != "apple2orange" && $FILE != "summer2winter_yosemite" && $FILE != "horse2zebra" && $FILE != "monet2photo" && $FILE != "cezanne2photo" && $FILE != "ukiyoe2photo" && $FILE != "vangogh2photo" && $FILE != "maps" && $FILE != "cityscapes" && $FILE != "facades" && $FILE != "iphone2dslr_flower" && $FILE != "ae_photos" ]]; then echo "Available datasets are: apple2orange, summer2winter_yosemite, horse2zebra, monet2photo, cezanne2photo, ukiyoe2photo, vangogh2photo, maps, cityscapes, facades, iphone2dslr_flower, ae_photos" exit 1 fi URL=https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/$FILE.zip ZIP_FILE=./dataset/$FILE.zip TARGET_DIR=./dataset/$FILE/ wget -N $URL -O $ZIP_FILE mkdir -p $TARGET_DIR unzip $ZIP_FILE -d ./dataset/ rm $ZIP_FILE ================================================ FILE: FQ-U-GAT-IT/logger.py ================================================ import sys class Logger(object): def __init__(self, output_file): self.terminal = sys.stdout self.log = open(output_file, "w") def write(self, message): print(message, end="", file=self.terminal, flush=True) print(message, end="", file=self.log, flush=True) def flush(self): self.terminal.flush() self.log.flush() ================================================ FILE: FQ-U-GAT-IT/main.py ================================================ from UGATIT import UGATIT import argparse from utils import * from logger import Logger import sys """parsing and configuration""" def parse_args(): desc = "Tensorflow implementation of U-GAT-IT" parser = argparse.ArgumentParser(description=desc) parser.add_argument('--phase', type=str, default='test', help='[train / test]') parser.add_argument('--light', type=str2bool, default=False, help='[U-GAT-IT full version / ' 'U-GAT-IT light version]') parser.add_argument('--dataset', type=str, default='selfie2anime', help='dataset_name') parser.add_argument('--epoch', type=int, default=101, help='The number of epochs to run') parser.add_argument('--iteration', type=int, default=10000, help='The number of training ' 'iterations') parser.add_argument('--batch_size', type=int, default=1, help='The size of batch size') parser.add_argument('--print_freq', type=int, default=1000, help='The number of ' 'image_print_freq') parser.add_argument('--save_freq', type=int, default=10, help='The number of ckpt_save_freq') parser.add_argument('--decay_flag', type=str2bool, default=True, help='The decay_flag') parser.add_argument('--decay_epoch', type=int, default=50, help='decay epoch') parser.add_argument('--lr', type=float, default=0.0001, help='The learning rate') parser.add_argument('--GP_ld', type=int, default=10, help='The gradient penalty lambda') parser.add_argument('--adv_weight', type=int, default=1, help='Weight about GAN') parser.add_argument('--cycle_weight', type=int, default=10, help='Weight about Cycle') parser.add_argument('--identity_weight', type=int, default=10, help='Weight about Identity') parser.add_argument('--cam_weight', type=int, default=1000, help='Weight about CAM') parser.add_argument('--gan_type', type=str, default='lsgan', help='[gan / lsgan / wgan-gp / wgan-lp / dragan / hinge]') parser.add_argument('--smoothing', type=str2bool, default=True, help='AdaLIN smoothing effect') parser.add_argument('--ch', type=int, default=64, help='base channel number per layer') parser.add_argument('--n_res', type=int, default=4, help='The number of resblock') parser.add_argument('--n_dis', type=int, default=6, help='The number of discriminator layer') parser.add_argument('--n_critic', type=int, default=1, help='The number of critic') parser.add_argument('--sn', type=str2bool, default=True, help='using spectral norm') parser.add_argument('--img_size', type=int, default=256, help='The size of image') parser.add_argument('--img_ch', type=int, default=3, help='The size of image channel') parser.add_argument('--augment_flag', type=str2bool, default=True, help='Image augmentation use or not') parser.add_argument('--checkpoint_dir', type=str, default='checkpoint', help='Directory name to save the checkpoints') parser.add_argument('--result_dir', type=str, default='results', help='Directory name to save the generated images') parser.add_argument('--log_dir', type=str, default='logs', help='Directory name to save training logs') parser.add_argument('--sample_dir', type=str, default='samples', help='Directory name to save the samples on training') # Quantization argument parser.add_argument('--quant', type=str2bool, default=True, help='quantization or not?') parser.add_argument('--commitment_cost', type=float, default=2.0, help='commitment cost') parser.add_argument('--quantization_layer', type=str, default='123', help='which layer?') parser.add_argument('--decay', type=float, default=0.85, help='dictionary learning decay') parser.add_argument('--test_train', type=str2bool, default=True, help='if test while training') return check_args(parser.parse_args()) """checking arguments""" def check_args(args): # --checkpoint_dir if args.quant: args.checkpoint_dir += '_quant' args.result_dir += '_quant' args.log_dir += '_quant' args.sample_dir += '_quant' check_folder(args.checkpoint_dir) # --result_dir check_folder(args.result_dir) # --result_dir check_folder(args.log_dir) # --sample_dir check_folder(args.sample_dir) # --epoch try: assert args.epoch >= 1 except: print('number of epochs must be larger than or equal to one') # --batch_size try: assert args.batch_size >= 1 except: print('batch size must be larger than or equal to one') return args """main""" def main(): # parse arguments args = parse_args() if args is None: exit() # open session with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: gan = UGATIT(sess, args) # build graph gan.build_model() # show network architecture show_all_variables() # check_folder(gan.model_dir) # sys.stdout = Logger(os.path.join(gan.model_dir, 'log.txt')) if args.phase == 'train' : gan.train() print(" [*] Training finished!") if args.phase == 'test' : gan.test(epoch=0) print(" [*] Test finished!") if __name__ == '__main__': main() ================================================ FILE: FQ-U-GAT-IT/ops.py ================================================ import tensorflow as tf import tensorflow.contrib as tf_contrib # Xavier : tf_contrib.layers.xavier_initializer() # He : tf_contrib.layers.variance_scaling_initializer() # Normal : tf.random_normal_initializer(mean=0.0, stddev=0.02) # l2_decay : tf_contrib.layers.l2_regularizer(0.0001) weight_init = tf.random_normal_initializer(mean=0.0, stddev=0.02) weight_regularizer = tf_contrib.layers.l2_regularizer(scale=0.0001) ################################################################################## # Layer ################################################################################## def conv(x, channels, kernel=4, stride=2, pad=0, pad_type='zero', use_bias=True, sn=False, scope='conv_0'): with tf.variable_scope(scope): if pad > 0 : if (kernel - stride) % 2 == 0: pad_top = pad pad_bottom = pad pad_left = pad pad_right = pad else: pad_top = pad pad_bottom = kernel - stride - pad_top pad_left = pad pad_right = kernel - stride - pad_left if pad_type == 'zero': x = tf.pad(x, [[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]]) if pad_type == 'reflect': x = tf.pad(x, [[0, 0], [pad_top, pad_bottom], [pad_left, pad_right], [0, 0]], mode='REFLECT') if sn : w = tf.get_variable("kernel", shape=[kernel, kernel, x.get_shape()[-1], channels], initializer=weight_init, regularizer=weight_regularizer) x = tf.nn.conv2d(input=x, filter=spectral_norm(w), strides=[1, stride, stride, 1], padding='VALID') if use_bias : bias = tf.get_variable("bias", [channels], initializer=tf.constant_initializer(0.0)) x = tf.nn.bias_add(x, bias) else : x = tf.layers.conv2d(inputs=x, filters=channels, kernel_size=kernel, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, strides=stride, use_bias=use_bias) return x def fully_connected_with_w(x, use_bias=True, sn=False, reuse=False, scope='linear'): with tf.variable_scope(scope, reuse=reuse): x = flatten(x) bias = 0.0 shape = x.get_shape().as_list() channels = shape[-1] w = tf.get_variable("kernel", [channels, 1], tf.float32, initializer=weight_init, regularizer=weight_regularizer) if sn : w = spectral_norm(w) if use_bias : bias = tf.get_variable("bias", [1], initializer=tf.constant_initializer(0.0)) x = tf.matmul(x, w) + bias else : x = tf.matmul(x, w) if use_bias : weights = tf.gather(tf.transpose(tf.nn.bias_add(w, bias)), 0) else : weights = tf.gather(tf.transpose(w), 0) return x, weights def fully_connected(x, units, use_bias=True, sn=False, scope='linear'): with tf.variable_scope(scope): x = flatten(x) shape = x.get_shape().as_list() channels = shape[-1] if sn: w = tf.get_variable("kernel", [channels, units], tf.float32, initializer=weight_init, regularizer=weight_regularizer) if use_bias: bias = tf.get_variable("bias", [units], initializer=tf.constant_initializer(0.0)) x = tf.matmul(x, spectral_norm(w)) + bias else: x = tf.matmul(x, spectral_norm(w)) else : x = tf.layers.dense(x, units=units, kernel_initializer=weight_init, kernel_regularizer=weight_regularizer, use_bias=use_bias) return x def flatten(x) : return tf.layers.flatten(x) ################################################################################## # Residual-block ################################################################################## def resblock(x_init, channels, use_bias=True, scope='resblock_0'): with tf.variable_scope(scope): with tf.variable_scope('res1'): x = conv(x_init, channels, kernel=3, stride=1, pad=1, pad_type='reflect', use_bias=use_bias) x = instance_norm(x) x = relu(x) with tf.variable_scope('res2'): x = conv(x, channels, kernel=3, stride=1, pad=1, pad_type='reflect', use_bias=use_bias) x = instance_norm(x) return x + x_init def adaptive_ins_layer_resblock(x_init, channels, gamma, beta, use_bias=True, smoothing=True, scope='adaptive_resblock') : with tf.variable_scope(scope): with tf.variable_scope('res1'): x = conv(x_init, channels, kernel=3, stride=1, pad=1, pad_type='reflect', use_bias=use_bias) x = adaptive_instance_layer_norm(x, gamma, beta, smoothing) x = relu(x) with tf.variable_scope('res2'): x = conv(x, channels, kernel=3, stride=1, pad=1, pad_type='reflect', use_bias=use_bias) x = adaptive_instance_layer_norm(x, gamma, beta, smoothing) return x + x_init ################################################################################## # Sampling ################################################################################## def up_sample(x, scale_factor=2): _, h, w, _ = x.get_shape().as_list() new_size = [h * scale_factor, w * scale_factor] return tf.image.resize_nearest_neighbor(x, size=new_size) def global_avg_pooling(x): gap = tf.reduce_mean(x, axis=[1, 2]) return gap def global_max_pooling(x): gmp = tf.reduce_max(x, axis=[1, 2]) return gmp ################################################################################## # Activation function ################################################################################## def lrelu(x, alpha=0.01): # pytorch alpha is 0.01 return tf.nn.leaky_relu(x, alpha) def relu(x): return tf.nn.relu(x) def tanh(x): return tf.tanh(x) def sigmoid(x) : return tf.sigmoid(x) ################################################################################## # Normalization function ################################################################################## def adaptive_instance_layer_norm(x, gamma, beta, smoothing=True, scope='instance_layer_norm') : with tf.variable_scope(scope): ch = x.shape[-1] eps = 1e-5 ins_mean, ins_sigma = tf.nn.moments(x, axes=[1, 2], keep_dims=True) x_ins = (x - ins_mean) / (tf.sqrt(ins_sigma + eps)) ln_mean, ln_sigma = tf.nn.moments(x, axes=[1, 2, 3], keep_dims=True) x_ln = (x - ln_mean) / (tf.sqrt(ln_sigma + eps)) rho = tf.get_variable("rho", [ch], initializer=tf.constant_initializer(1.0), constraint=lambda x: tf.clip_by_value(x, clip_value_min=0.0, clip_value_max=1.0)) if smoothing : rho = tf.clip_by_value(rho - tf.constant(0.1), 0.0, 1.0) x_hat = rho * x_ins + (1 - rho) * x_ln x_hat = x_hat * gamma + beta return x_hat def instance_norm(x, scope='instance_norm'): return tf_contrib.layers.instance_norm(x, epsilon=1e-05, center=True, scale=True, scope=scope) def layer_norm(x, scope='layer_norm') : return tf_contrib.layers.layer_norm(x, center=True, scale=True, scope=scope) def layer_instance_norm(x, scope='layer_instance_norm') : with tf.variable_scope(scope): ch = x.shape[-1] eps = 1e-5 ins_mean, ins_sigma = tf.nn.moments(x, axes=[1, 2], keep_dims=True) x_ins = (x - ins_mean) / (tf.sqrt(ins_sigma + eps)) ln_mean, ln_sigma = tf.nn.moments(x, axes=[1, 2, 3], keep_dims=True) x_ln = (x - ln_mean) / (tf.sqrt(ln_sigma + eps)) rho = tf.get_variable("rho", [ch], initializer=tf.constant_initializer(0.0), constraint=lambda x: tf.clip_by_value(x, clip_value_min=0.0, clip_value_max=1.0)) gamma = tf.get_variable("gamma", [ch], initializer=tf.constant_initializer(1.0)) beta = tf.get_variable("beta", [ch], initializer=tf.constant_initializer(0.0)) x_hat = rho * x_ins + (1 - rho) * x_ln x_hat = x_hat * gamma + beta return x_hat def spectral_norm(w, iteration=1): w_shape = w.shape.as_list() w = tf.reshape(w, [-1, w_shape[-1]]) u = tf.get_variable("u", [1, w_shape[-1]], initializer=tf.random_normal_initializer(), trainable=False) u_hat = u v_hat = None for i in range(iteration): """ power iteration Usually iteration = 1 will be enough """ v_ = tf.matmul(u_hat, tf.transpose(w)) v_hat = tf.nn.l2_normalize(v_) u_ = tf.matmul(v_hat, w) u_hat = tf.nn.l2_normalize(u_) u_hat = tf.stop_gradient(u_hat) v_hat = tf.stop_gradient(v_hat) sigma = tf.matmul(tf.matmul(v_hat, w), tf.transpose(u_hat)) with tf.control_dependencies([u.assign(u_hat)]): w_norm = w / sigma w_norm = tf.reshape(w_norm, w_shape) return w_norm ################################################################################## # Loss function ################################################################################## def L1_loss(x, y): loss = tf.reduce_mean(tf.abs(x - y)) return loss def cam_loss(source, non_source) : identity_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(source), logits=source)) non_identity_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(non_source), logits=non_source)) loss = identity_loss + non_identity_loss return loss def regularization_loss(scope_name) : """ If you want to use "Regularization" g_loss += regularization_loss('generator') d_loss += regularization_loss('discriminator') """ collection_regularization = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) loss = [] for item in collection_regularization : if scope_name in item.name : loss.append(item) return tf.reduce_sum(loss) def discriminator_loss(loss_func, real, fake): loss = [] real_loss = 0 fake_loss = 0 for i in range(2) : if loss_func.__contains__('wgan') : real_loss = -tf.reduce_mean(real[i]) fake_loss = tf.reduce_mean(fake[i]) if loss_func == 'lsgan' : real_loss = tf.reduce_mean(tf.squared_difference(real[i], 1.0)) fake_loss = tf.reduce_mean(tf.square(fake[i])) if loss_func == 'gan' or loss_func == 'dragan' : real_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(real[i]), logits=real[i])) fake_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(fake[i]), logits=fake[i])) if loss_func == 'hinge' : real_loss = tf.reduce_mean(relu(1.0 - real[i])) fake_loss = tf.reduce_mean(relu(1.0 + fake[i])) loss.append(real_loss + fake_loss) return sum(loss) def generator_loss(loss_func, fake): loss = [] fake_loss = 0 for i in range(2) : if loss_func.__contains__('wgan') : fake_loss = -tf.reduce_mean(fake[i]) if loss_func == 'lsgan' : fake_loss = tf.reduce_mean(tf.squared_difference(fake[i], 1.0)) if loss_func == 'gan' or loss_func == 'dragan' : fake_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(fake[i]), logits=fake[i])) if loss_func == 'hinge' : fake_loss = -tf.reduce_mean(fake[i]) loss.append(fake_loss) return sum(loss) ================================================ FILE: FQ-U-GAT-IT/utils.py ================================================ import tensorflow as tf from tensorflow.contrib import slim import cv2 import os, random import numpy as np class ImageData: def __init__(self, load_size, channels, augment_flag): self.load_size = load_size self.channels = channels self.augment_flag = augment_flag def image_processing(self, filename): x = tf.read_file(filename) x_decode = tf.image.decode_jpeg(x, channels=self.channels) img = tf.image.resize_images(x_decode, [self.load_size, self.load_size]) img = tf.cast(img, tf.float32) / 127.5 - 1 if self.augment_flag : augment_size = self.load_size + (30 if self.load_size == 256 else 15) p = random.random() if p > 0.5: img = augmentation(img, augment_size) return img def load_test_data(image_path, size=256): img = cv2.imread(image_path, flags=cv2.IMREAD_COLOR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = cv2.resize(img, dsize=(size, size)) img = np.expand_dims(img, axis=0) img = img/127.5 - 1 return img def augmentation(image, augment_size): seed = random.randint(0, 2 ** 31 - 1) ori_image_shape = tf.shape(image) image = tf.image.random_flip_left_right(image, seed=seed) image = tf.image.resize_images(image, [augment_size, augment_size]) image = tf.random_crop(image, ori_image_shape, seed=seed) return image def save_images(images, size, image_path): return imsave(inverse_transform(images), size, image_path) def inverse_transform(images): return ((images+1.) / 2) * 255.0 def imsave(images, size, path): images = merge(images, size) images = cv2.cvtColor(images.astype('uint8'), cv2.COLOR_RGB2BGR) return cv2.imwrite(path, images) def merge(images, size): h, w = images.shape[1], images.shape[2] img = np.zeros((h * size[0], w * size[1], 3)) for idx, image in enumerate(images): i = idx % size[1] j = idx // size[1] img[h*j:h*(j+1), w*i:w*(i+1), :] = image return img def show_all_variables(): model_vars = tf.trainable_variables() slim.model_analyzer.analyze_vars(model_vars, print_info=True) def check_folder(log_dir): if not os.path.exists(log_dir): os.makedirs(log_dir) return log_dir def str2bool(x): return x.lower() in ('true') ================================================ FILE: FQ-U-GAT-IT/vq_layer.py ================================================ import tensorflow as tf from tensorflow.python.training import moving_averages class VectorQuantizerEMA: """Sonnet module representing the VQ-VAE layer. Args: embedding_dim: integer representing the dimensionality of the tensors in the quantized space. Inputs to the modules must be in this format as well. num_embeddings: integer, the number of vectors in the quantized space. commitment_cost: scalar which controls the weighting of the loss terms (see equation 4 in the paper). decay: float, decay for the moving averages. epsilon: small float constant to avoid numerical instability. """ def __init__(self, embedding_dim, num_embeddings, commitment_cost, decay, epsilon=1e-5, name='VectorQuantizerEMA'): # super(VectorQuantizerEMA, self).__init__(name=name) self._embedding_dim = embedding_dim self._num_embeddings = num_embeddings self._decay = decay self._commitment_cost = commitment_cost self._epsilon = epsilon def __call__(self, inputs, reuse=False, layer=None, is_training=True): """Connects the module to some inputs. Args: inputs: Tensor, final dimension must be equal to embedding_dim. All other leading dimensions will be flattened and treated as a large batch. is_training: boolean, whether this connection is to training data. When this is set to False, the internal moving average statistics will not be updated. Returns: dict containing the following keys and values: quantize: Tensor containing the quantized version of the input. loss: Tensor containing the loss to optimize. perplexity: Tensor containing the perplexity of the encodings. encodings: Tensor containing the discrete encodings, ie which element of the quantized space each input element was mapped to. encoding_indices: Tensor containing the discrete encoding indices, ie which element of the quantized space each input element was mapped to. """ # Ensure that the weights are read fresh for each timestep, which otherwise # would not be guaranteed in an RNN setup. Note that this relies on inputs # having a data dependency with the output of the previous timestep - if # this is not the case, there is no way to serialize the order of weight # updates within the module, so explicit external dependencies must be used. with tf.variable_scope('vq_layer%d'%layer, reuse=reuse): initializer = tf.random_normal_initializer() # w is a matrix with an embedding in each column. When training, the # embedding is assigned to be the average of all inputs assigned to that # embedding. self._w = tf.get_variable( 'embedding', [self._embedding_dim, self._num_embeddings], initializer=initializer, use_resource=True) self._ema_cluster_size = tf.get_variable( 'ema_cluster_size', [self._num_embeddings], initializer=tf.constant_initializer(0), use_resource=True) self._ema_w = tf.get_variable( 'ema_dw', initializer=self._w.initialized_value(), use_resource=True) with tf.control_dependencies([inputs]): w = self._w.read_value() input_shape = tf.shape(inputs) with tf.control_dependencies([ tf.Assert(tf.equal(input_shape[-1], self._embedding_dim), [input_shape])]): flat_inputs = tf.reshape(inputs, [-1, self._embedding_dim]) distances = (tf.reduce_sum(flat_inputs**2, 1, keepdims=True) - 2 * tf.matmul(flat_inputs, w) + tf.reduce_sum(w ** 2, 0, keepdims=True)) encoding_indices = tf.argmax(- distances, 1) encodings = tf.one_hot(encoding_indices, self._num_embeddings) encoding_indices = tf.reshape(encoding_indices, tf.shape(inputs)[:-1]) quantized = self.quantize(encoding_indices) e_latent_loss = tf.reduce_mean((tf.stop_gradient(quantized) - inputs) ** 2) if is_training: updated_ema_cluster_size = moving_averages.assign_moving_average( self._ema_cluster_size, tf.reduce_sum(encodings, 0), self._decay) dw = tf.matmul(flat_inputs, encodings, transpose_a=True) updated_ema_w = moving_averages.assign_moving_average(self._ema_w, dw, self._decay) n = tf.reduce_sum(updated_ema_cluster_size) updated_ema_cluster_size = ( (updated_ema_cluster_size + self._epsilon) / (n + self._num_embeddings * self._epsilon) * n) normalised_updated_ema_w = ( updated_ema_w / tf.reshape(updated_ema_cluster_size, [1, -1])) with tf.control_dependencies([e_latent_loss]): update_w = tf.assign(self._w, normalised_updated_ema_w) with tf.control_dependencies([update_w]): loss = self._commitment_cost * e_latent_loss else: loss = self._commitment_cost * e_latent_loss quantized = inputs + tf.stop_gradient(quantized - inputs) avg_probs = tf.reduce_mean(encodings, 0) perplexity = tf.exp(- tf.reduce_sum(avg_probs * tf.log(avg_probs + 1e-10))) return loss, perplexity @property def embeddings(self): return self._w def quantize(self, encoding_indices): with tf.control_dependencies([encoding_indices]): w = tf.transpose(self.embeddings.read_value(), [1, 0]) return tf.nn.embedding_lookup(w, encoding_indices, validate_indices=False) ================================================ FILE: README.md ================================================ # FQ-GAN ### Recent Update * May 22, 2020 Releasing the pre-trained FQ-BigGAN/BigGAN at resolution 64x64 and their training logs at the [link](https://textae.blob.core.windows.net/qgan/qgan/ibm_ckpt.zip) (10.34G): * May 22, 2020 [`Selfie2Anime Demo`](http://40.71.23.172:8888/) is released. Try it out. * [Colab](https://colab.research.google.com/drive/1XdhEBen8vBlqIE-XPuu8j7FHYMs-x83z?usp=sharing) file for training and testing. Put it into```FQ-GAN/FQ-U-GAT-IT``` and follow the training/testing instruction. * Selfie2Anime pretrained models are available now!! [Halfway checkpoint](https://drive.google.com/drive/folders/1okZAuNYSZvhXtOuHXJOkcMQW4aIWar_M?usp=sharing) and [Final checkpoint](https://drive.google.com/drive/folders/1UIcC6OLa7aEXQjKI8CU3ZfTT3PpuhXGW?usp=sharing). * [Photo2Portrait](https://drive.google.com/drive/folders/1hE8p0CcsQOvOtbVzoBql0wsdtsMgFvEZ?usp=sharing) pretrained model is released! *** This repository contains source code to reproduce the results presented in the paper: [Feature Quantization Improves GAN Training](https://arxiv.org/abs/2004.02088), ICML 2020
[Yang Zhao*](https://sites.google.com/view/zhao-yang/), [Chunyuan Li*](http://chunyuan.li/), [Ping Yu](http://irisyu.me/), Jianfeng Gao, [Changyou Chen](https://cse.buffalo.edu/~changyou/)

## Contents 1. [FQ-BigGAN](#FQ-BigGAN) 2. [FQ-U-GAT-IT](#FQ-U-GAT-IT) 3. [FQ-StyleGAN](#FQ-StyleGAN) ## FQ-BigGAN This code is based on [PyTorchGAN](https://github.com/ajbrock/BigGAN-PyTorch). Here we will give more details of the code usage. You will need **python 3.x, pytorch 1.x, tqdm ,h5py** ### Prepare datasets 1. CIFAR-10 or CIFAR-100 (change C10 to C100 to prepare CIFAR-100) ``` python make_hdf5.py --dataset C10 --batch_size 256 --data_root data python calculate_inception_moments.py --dataset C10 --data_root data --batch_size 128 ``` 2. ImageNet, first you need to manually download ImageNet and put all image class folders into `./data/ImageNet`, then execute the following command to prepare ImageNet (128×128) ``` python make_hdf5.py --dataset I128 --batch_size 256 --data_root data python calculate_inception_moments.py --dataset I128_hdf5 --data_root data --batch_size 128 ``` ### Training We have four bash scripts in FQ-BigGAN/scripts to train CIFAR-10, CIFAR-100, ImageNet (64×64) and ImageNet (128×128), respectively. For example, to train CIFAR-100, you may simply run ``` sh scripts/launch_C100.sh ``` To modify the FQ hyper-parameters, we provide the following options in each script as arguments: 1. `--discrete_layer`: it specifies which layers you want quantization to be added, i.e. 0123 2. `--commitment` : it is the quantization loss coefficient, default=1.0 3. `--dict_size`: the size of the EMA dictionary, default=8, meaning there are 2^8 keys in the dictionary. 4. `--dict_decay`: the momentum when learning the dictionary, default=0.8. ### Experiment results Learning curves on CIFAR-100.

FID score comparison with BigGAN on ImageNet
| Model | 64×64 | 128×128| |:--------:|:-------:|:-------------:| | BigGAN |10.55 | 14.88 | | FQ-BigGAN | 9.67 | 13.77 |
## FQ-U-GAT-IT This experiment is based on the official codebase [U-GAT-IT](https://github.com/taki0112/UGATIT). Here we plan to give more details of the dataset preparation and code usage. You will need **python 3.6.x, tensorflow-gpu-1.14.0, opencv-python, tensorboardX**

### Prepare datasets We use selfie2anime, cat2dog, horse2zebra, photo2portrait, vangogh2photo. 1. selfie2anime: go to [U-GAT-IT](https://github.com/taki0112/UGATIT) to download the dataset and unzip it to `./dataset`. 2. cat2dog and photo2portrait: here we provide a bash script adapted from [DRIT](https://github.com/HsinYingLee/DRIT) to download the two datasets. ``` cd FQ-U-GAT-IT/dataset && sh download_dataset_1.sh [cat2dog, portrait] ``` 3. horse2zebra and vangogh2photo: here we provide a bash script adapted from [CycleGAN](https://github.com/junyanz/CycleGAN) to download the two datasets. ``` cd FQ-U-GAT-IT && bash download_dataset_2.sh [horse2zebra, vangogh2photo] ``` ### Training ``` python main.py --phase train --dataset [type=str, selfie2anime/portrait/cat2dog/horse2zebra/vangogh2photo] --quant [type=bool, True/False] --commitment_cost [type=float, default=2.0] --quantization_layer [type=str, i.e. 123] --decay [type=float, default=0.85] ``` By default, the training procedure will output checkpoints and intermediate translations from (testA, testB) to `checkpoints (checkpoints_quant)` and `results (results_quant)` respectively. ### Testing ``` python main.py --phase test --test_train False --dataset [type=str, selfie2anime/portrait/cat2dog/horse2zebra/vangogh2photo] --quant [type=bool, True/False] --commitment_cost [type=float, default=2.0] --quantization_layer [type=str, i.e. 123] --decay [type=float, default=0.85] ``` If the model is freshly loaded from what I have shared, remember to put them into ```checkpoint_quant/UGATIT_q_selfie2anime_lsgan_4resblock_6dis_1_1_10_10_1000_sn_smoothing_123_2.0_0.85``` by default and modify the file ```checkpoint``` accordingly. This structure is inherited from the official U-GAT-IT. Please feel free to modify it for convinience. ### Usage ``` ├── FQ-GAN    └── FQ-U-GAT-IT    ├── dataset           ├── selfie2anime ├── portrait ├── vangogh2photo ├── horse2zebra └── cat2dog    ├── checkpoint_quant ├── UGATIT_q_selfie2anime_lsgan_4resblock_6dis_1_1_10_10_1000_sn_smoothing_123_2.0_0.85 ├── checkpoint ├── UGATIT.model-480000.data-00000-of-00001 ├── UGATIT.model-480000.index ├── UGATIT.model-480000.meta ├── UGATIT_q_portrait_lsgan_4resblock_6dis_1_1_10_10_1000_sn_smoothing_123_2.0_0.85 └── ... ``` If you choose the halfway pretrained model, contents in ```checkpoint``` should be ``` model_checkpoint_path: "UGATIT.model-480000" all_model_checkpoint_paths: "UGATIT.model-480000" ``` ## FQ-StyleGAN This experiment is based on the official codebase [StyleGAN2](https://github.com/NVlabs/stylegan2). The original [Flicker-Faces](https://arxiv.org/abs/1812.04948) dataset includes multi-resolution data. You will need **python 3.6.x, tensorflow-gpu 1.14.0, numpy** ### Prepare datasets To obtain the FFHQ dataset, please refer to [FFHQ repository](https://github.com/NVlabs/ffhq-dataset) and download the tfrecords dataset [FFHQ-tfrecords](https://drive.google.com/drive/folders/1LTBpJ0W_WLjqza3zdayligS8Dh1V1gA6) into `datasets/ffhq`. ### Training ``` python run_training.py --num-gpus=8 --data-dir=datasets --config=config-e --dataset=ffhq --mirror-augment=true --total-kimg 25000 --gamma=100 --D_type=1 --discrete_layer [type=string, default=45] --commitment_cost [type=float, default=0.25] --decay [type=float, default=0.8] ```
| Model | 32×32| 64×64 | 128×128| 1024×1024| |:--------:|:-------:|:-------------:|:-------:|:-------------:| | StyleGAN |3.28 | 4.82 | 6.33 | 5.24 | FQ-StyleGAN |3.01 | 4.36 | 5.98 | 4.89
## Acknowledgements We thank official open-source implementations of [BigGAN](https://arxiv.org/abs/1809.11096), [StyleGAN](https://arxiv.org/abs/1812.04948), [StyleGAN2](https://arxiv.org/abs/1912.04958) and [U-GAT-IT](https://arxiv.org/abs/1907.10830).