[
  {
    "path": "BigGAN_utils/BigGAN.py",
    "content": "import numpy as np\nimport math\nimport functools\n\nimport torch\nimport torch.nn as nn\nfrom torch.nn import init\nimport torch.optim as optim\nimport torch.nn.functional as F\nfrom torch.nn import Parameter as P\n\nimport layers\nfrom sync_batchnorm import SynchronizedBatchNorm2d as SyncBatchNorm2d\n\n\n# Architectures for G\n# Attention is passed in in the format '32_64' to mean applying an attention\n# block at both resolution 32x32 and 64x64. Just '64' will apply at 64x64.\ndef G_arch(ch=64, attention='64', ksize='333333', dilation='111111'):\n  arch = {}\n  arch[512] = {'in_channels' :  [ch * item for item in [16, 16, 8, 8, 4, 2, 1]],\n               'out_channels' : [ch * item for item in [16,  8, 8, 4, 2, 1, 1]],\n               'upsample' : [True] * 7,\n               'resolution' : [8, 16, 32, 64, 128, 256, 512],\n               'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')])\n                              for i in range(3,10)}}\n  arch[256] = {'in_channels' :  [ch * item for item in [16, 16, 8, 8, 4, 2]],\n               'out_channels' : [ch * item for item in [16,  8, 8, 4, 2, 1]],\n               'upsample' : [True] * 6,\n               'resolution' : [8, 16, 32, 64, 128, 256],\n               'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')])\n                              for i in range(3,9)}}\n  arch[128] = {'in_channels' :  [ch * item for item in [16, 16, 8, 4, 2]],\n               'out_channels' : [ch * item for item in [16, 8, 4, 2, 1]],\n               'upsample' : [True] * 5,\n               'resolution' : [8, 16, 32, 64, 128],\n               'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')])\n                              for i in range(3,8)}}\n  arch[64]  = {'in_channels' :  [ch * item for item in [16, 16, 8, 4]],\n               'out_channels' : [ch * item for item in [16, 8, 4, 2]],\n               'upsample' : [True] * 4,\n               'resolution' : [8, 16, 32, 64],\n               'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')])\n                              for i in range(3,7)}}\n  arch[32]  = {'in_channels' :  [ch * item for item in [4, 4, 4]],\n               'out_channels' : [ch * item for item in [4, 4, 4]],\n               'upsample' : [True] * 3,\n               'resolution' : [8, 16, 32],\n               'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')])\n                              for i in range(3,6)}}\n\n  return arch\n\nclass Generator(nn.Module):\n  def __init__(self, G_ch=64, dim_z=128, bottom_width=4, resolution=128,\n               G_kernel_size=3, G_attn='64', n_classes=1000,\n               num_G_SVs=1, num_G_SV_itrs=1,\n               G_shared=True, shared_dim=0, hier=False,\n               cross_replica=False, mybn=False,\n               G_activation=nn.ReLU(inplace=False),\n               G_lr=5e-5, G_B1=0.0, G_B2=0.999, adam_eps=1e-8,\n               BN_eps=1e-5, SN_eps=1e-12, G_mixed_precision=False, G_fp16=False,\n               G_init='ortho', skip_init=False, no_optim=False,\n               G_param='SN', norm_style='bn',\n               **kwargs):\n    super(Generator, self).__init__()\n    # Channel width mulitplier\n    self.ch = G_ch\n    # Dimensionality of the latent space\n    self.dim_z = dim_z\n    # The initial spatial dimensions\n    self.bottom_width = bottom_width\n    # Resolution of the output\n    self.resolution = resolution\n    # Kernel size?\n    self.kernel_size = G_kernel_size\n    # Attention?\n    self.attention = G_attn\n    # number of classes, for use in categorical conditional generation\n    self.n_classes = n_classes\n    # Use shared embeddings?\n    self.G_shared = G_shared\n    # Dimensionality of the shared embedding? Unused if not using G_shared\n    self.shared_dim = shared_dim if shared_dim > 0 else dim_z\n    # Hierarchical latent space?\n    self.hier = hier\n    # Cross replica batchnorm?\n    self.cross_replica = cross_replica\n    # Use my batchnorm?\n    self.mybn = mybn\n    # nonlinearity for residual blocks\n    self.activation = G_activation\n    # Initialization style\n    self.init = G_init\n    # Parameterization style\n    self.G_param = G_param\n    # Normalization style\n    self.norm_style = norm_style\n    # Epsilon for BatchNorm?\n    self.BN_eps = BN_eps\n    # Epsilon for Spectral Norm?\n    self.SN_eps = SN_eps\n    # fp16?\n    self.fp16 = G_fp16\n    # Architecture dict\n    self.arch = G_arch(self.ch, self.attention)[resolution]\n\n    # If using hierarchical latents, adjust z\n    if self.hier:\n      # Number of places z slots into\n      self.num_slots = len(self.arch['in_channels']) + 1\n      self.z_chunk_size = (self.dim_z // self.num_slots)\n      # Recalculate latent dimensionality for even splitting into chunks\n      self.dim_z = self.z_chunk_size *  self.num_slots\n    else:\n      self.num_slots = 1\n      self.z_chunk_size = 0\n\n    # Which convs, batchnorms, and linear layers to use\n    if self.G_param == 'SN':\n      self.which_conv = functools.partial(layers.SNConv2d,\n                          kernel_size=3, padding=1,\n                          num_svs=num_G_SVs, num_itrs=num_G_SV_itrs,\n                          eps=self.SN_eps)\n      self.which_linear = functools.partial(layers.SNLinear,\n                          num_svs=num_G_SVs, num_itrs=num_G_SV_itrs,\n                          eps=self.SN_eps)\n    else:\n      self.which_conv = functools.partial(nn.Conv2d, kernel_size=3, padding=1)\n      self.which_linear = nn.Linear\n      \n    # We use a non-spectral-normed embedding here regardless;\n    # For some reason applying SN to G's embedding seems to randomly cripple G\n    self.which_embedding = nn.Embedding\n    bn_linear = (functools.partial(self.which_linear, bias=False) if self.G_shared\n                 else self.which_embedding)\n    self.which_bn = functools.partial(layers.ccbn,\n                          which_linear=bn_linear,\n                          cross_replica=self.cross_replica,\n                          mybn=self.mybn,\n                          input_size=(self.shared_dim + self.z_chunk_size if self.G_shared\n                                      else self.n_classes),\n                          norm_style=self.norm_style,\n                          eps=self.BN_eps)\n\n\n    # Prepare model\n    # If not using shared embeddings, self.shared is just a passthrough\n    self.shared = (self.which_embedding(n_classes, self.shared_dim) if G_shared \n                    else layers.identity())\n    # First linear layer\n    self.linear = self.which_linear(self.dim_z // self.num_slots,\n                                    self.arch['in_channels'][0] * (self.bottom_width **2))\n\n    # self.blocks is a doubly-nested list of modules, the outer loop intended\n    # to be over blocks at a given resolution (resblocks and/or self-attention)\n    # while the inner loop is over a given block\n    self.blocks = []\n    for index in range(len(self.arch['out_channels'])):\n      self.blocks += [[layers.GBlock(in_channels=self.arch['in_channels'][index],\n                             out_channels=self.arch['out_channels'][index],\n                             which_conv=self.which_conv,\n                             which_bn=self.which_bn,\n                             activation=self.activation,\n                             upsample=(functools.partial(F.interpolate, scale_factor=2)\n                                       if self.arch['upsample'][index] else None))]]\n\n      # If attention on this block, attach it to the end\n      if self.arch['attention'][self.arch['resolution'][index]]:\n        print(self.arch['resolution'], self.arch['attention'])\n        print('Adding attention layer in G at resolution %d' % self.arch['resolution'][index])\n        self.blocks[-1] += [layers.Attention(self.arch['out_channels'][index], self.which_conv)]\n\n    # Turn self.blocks into a ModuleList so that it's all properly registered.\n    self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks])\n\n    # output layer: batchnorm-relu-conv.\n    # Consider using a non-spectral conv here\n    self.output_layer = nn.Sequential(layers.bn(self.arch['out_channels'][-1],\n                                                cross_replica=self.cross_replica,\n                                                mybn=self.mybn),\n                                    self.activation,\n                                    self.which_conv(self.arch['out_channels'][-1], 3))\n\n    # Initialize weights. Optionally skip init for testing.\n    if not skip_init:\n      self.init_weights()\n\n    # Set up optimizer\n    # If this is an EMA copy, no need for an optim, so just return now\n    if no_optim:\n      return\n    self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps\n    if G_mixed_precision:\n      print('Using fp16 adam in G...')\n      import utils\n      self.optim = utils.Adam16(params=self.parameters(), lr=self.lr,\n                           betas=(self.B1, self.B2), weight_decay=0,\n                           eps=self.adam_eps)\n    else:\n      self.optim = optim.Adam(params=self.parameters(), lr=self.lr,\n                           betas=(self.B1, self.B2), weight_decay=0,\n                           eps=self.adam_eps)\n\n    # LR scheduling, left here for forward compatibility\n    # self.lr_sched = {'itr' : 0}# if self.progressive else {}\n    # self.j = 0\n\n  # Initialize\n  def init_weights(self):\n    self.param_count = 0\n    for module in self.modules():\n      if (isinstance(module, nn.Conv2d) \n          or isinstance(module, nn.Linear) \n          or isinstance(module, nn.Embedding)):\n        if self.init == 'ortho':\n          init.orthogonal_(module.weight)\n        elif self.init == 'N02':\n          init.normal_(module.weight, 0, 0.02)\n        elif self.init in ['glorot', 'xavier']:\n          init.xavier_uniform_(module.weight)\n        else:\n          print('Init style not recognized...')\n        self.param_count += sum([p.data.nelement() for p in module.parameters()])\n    print('Param count for G''s initialized parameters: %d' % self.param_count)\n\n  # Note on this forward function: we pass in a y vector which has\n  # already been passed through G.shared to enable easy class-wise\n  # interpolation later. If we passed in the one-hot and then ran it through\n  # G.shared in this forward function, it would be harder to handle.\n  def forward(self, z, y, w_y=None):\n    if w_y is not None:\n      s_y = torch.softmax(w_y, dim=1)\n\n      cur_y = s_y * y\n      y = cur_y.sum(dim=1, keepdim=False)\n\n    # If hierarchical, concatenate zs and ys\n    if self.hier:\n      zs = torch.split(z, self.z_chunk_size, 1)\n      z = zs[0]\n      ys = [torch.cat([y, item], 1) for item in zs[1:]]\n    else:\n      ys = [y] * len(self.blocks)\n      \n    # First linear layer\n    h = self.linear(z)\n    # Reshape\n    h = h.view(h.size(0), -1, self.bottom_width, self.bottom_width)\n\n    # Loop over blocks\n    for index, blocklist in enumerate(self.blocks):\n      # Second inner loop in case block has multiple layers\n      for block in blocklist:\n        h = block(h, ys[index])\n        \n    # Apply batchnorm-relu-conv-tanh at output\n    return torch.tanh(self.output_layer(h))\n\n  # Note on this forward function: we pass in a y vector which has\n  # already been passed through G.shared to enable easy class-wise\n  # interpolation later. If we passed in the one-hot and then ran it through\n  # G.shared in this forward function, it would be harder to handle.\n  def forward_org(self, z, y):\n    # If hierarchical, concatenate zs and ys\n    if self.hier:\n      zs = torch.split(z, self.z_chunk_size, 1)\n      z = zs[0]\n      ys = [torch.cat([y, item], 1) for item in zs[1:]]\n    else:\n      ys = [y] * len(self.blocks)\n      \n    # First linear layer\n    h = self.linear(z)\n    # Reshape\n    h = h.view(h.size(0), -1, self.bottom_width, self.bottom_width)\n\n    # Loop over blocks\n    for index, blocklist in enumerate(self.blocks):\n      # Second inner loop in case block has multiple layers\n      for block in blocklist:\n        h = block(h, ys[index])\n        \n    # Apply batchnorm-relu-conv-tanh at output\n    return torch.tanh(self.output_layer(h))\n\n\n# Discriminator architecture, same paradigm as G's above\ndef D_arch(ch=64, attention='64',ksize='333333', dilation='111111'):\n  arch = {}\n  arch[256] = {'in_channels' :  [3] + [ch*item for item in [1, 2, 4, 8, 8, 16]],\n               'out_channels' : [item * ch for item in [1, 2, 4, 8, 8, 16, 16]],\n               'downsample' : [True] * 6 + [False],\n               'resolution' : [128, 64, 32, 16, 8, 4, 4 ],\n               'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')]\n                              for i in range(2,8)}}\n  arch[128] = {'in_channels' :  [3] + [ch*item for item in [1, 2, 4, 8, 16]],\n               'out_channels' : [item * ch for item in [1, 2, 4, 8, 16, 16]],\n               'downsample' : [True] * 5 + [False],\n               'resolution' : [64, 32, 16, 8, 4, 4],\n               'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')]\n                              for i in range(2,8)}}\n  arch[64]  = {'in_channels' :  [3] + [ch*item for item in [1, 2, 4, 8]],\n               'out_channels' : [item * ch for item in [1, 2, 4, 8, 16]],\n               'downsample' : [True] * 4 + [False],\n               'resolution' : [32, 16, 8, 4, 4],\n               'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')]\n                              for i in range(2,7)}}\n  arch[32]  = {'in_channels' :  [3] + [item * ch for item in [4, 4, 4]],\n               'out_channels' : [item * ch for item in [4, 4, 4, 4]],\n               'downsample' : [True, True, False, False],\n               'resolution' : [16, 16, 16, 16],\n               'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')]\n                              for i in range(2,6)}}\n  return arch\n\nclass Discriminator(nn.Module):\n\n  def __init__(self, D_ch=64, D_wide=True, resolution=128,\n               D_kernel_size=3, D_attn='64', n_classes=1000,\n               num_D_SVs=1, num_D_SV_itrs=1, D_activation=nn.ReLU(inplace=False),\n               D_lr=2e-4, D_B1=0.0, D_B2=0.999, adam_eps=1e-8,\n               SN_eps=1e-12, output_dim=1, D_mixed_precision=False, D_fp16=False,\n               D_init='ortho', skip_init=False, D_param='SN', **kwargs):\n    super(Discriminator, self).__init__()\n    # Width multiplier\n    self.ch = D_ch\n    # Use Wide D as in BigGAN and SA-GAN or skinny D as in SN-GAN?\n    self.D_wide = D_wide\n    # Resolution\n    self.resolution = resolution\n    # Kernel size\n    self.kernel_size = D_kernel_size\n    # Attention?\n    self.attention = D_attn\n    # Number of classes\n    self.n_classes = n_classes\n    # Activation\n    self.activation = D_activation\n    # Initialization style\n    self.init = D_init\n    # Parameterization style\n    self.D_param = D_param\n    # Epsilon for Spectral Norm?\n    self.SN_eps = SN_eps\n    # Fp16?\n    self.fp16 = D_fp16\n    # Architecture\n    self.arch = D_arch(self.ch, self.attention)[resolution]\n\n    # Which convs, batchnorms, and linear layers to use\n    # No option to turn off SN in D right now\n    if self.D_param == 'SN':\n      self.which_conv = functools.partial(layers.SNConv2d,\n                          kernel_size=3, padding=1,\n                          num_svs=num_D_SVs, num_itrs=num_D_SV_itrs,\n                          eps=self.SN_eps)\n      self.which_linear = functools.partial(layers.SNLinear,\n                          num_svs=num_D_SVs, num_itrs=num_D_SV_itrs,\n                          eps=self.SN_eps)\n      self.which_embedding = functools.partial(layers.SNEmbedding,\n                              num_svs=num_D_SVs, num_itrs=num_D_SV_itrs,\n                              eps=self.SN_eps)\n    # Prepare model\n    # self.blocks is a doubly-nested list of modules, the outer loop intended\n    # to be over blocks at a given resolution (resblocks and/or self-attention)\n    self.blocks = []\n    for index in range(len(self.arch['out_channels'])):\n      self.blocks += [[layers.DBlock(in_channels=self.arch['in_channels'][index],\n                       out_channels=self.arch['out_channels'][index],\n                       which_conv=self.which_conv,\n                       wide=self.D_wide,\n                       activation=self.activation,\n                       preactivation=(index > 0),\n                       downsample=(nn.AvgPool2d(2) if self.arch['downsample'][index] else None))]]\n      # If attention on this block, attach it to the end\n      if self.arch['attention'][self.arch['resolution'][index]]:\n        print('Adding attention layer in D at resolution %d' % self.arch['resolution'][index])\n        self.blocks[-1] += [layers.Attention(self.arch['out_channels'][index],\n                                             self.which_conv)]\n    # Turn self.blocks into a ModuleList so that it's all properly registered.\n    self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks])\n    # Linear output layer. The output dimension is typically 1, but may be\n    # larger if we're e.g. turning this into a VAE with an inference output\n    self.linear = self.which_linear(self.arch['out_channels'][-1], output_dim)\n    # Embedding for projection discrimination\n    self.embed = self.which_embedding(self.n_classes, self.arch['out_channels'][-1])\n\n    # Initialize weights\n    if not skip_init:\n      self.init_weights()\n\n    # Set up optimizer\n    self.lr, self.B1, self.B2, self.adam_eps = D_lr, D_B1, D_B2, adam_eps\n    if D_mixed_precision:\n      print('Using fp16 adam in D...')\n      import utils\n      self.optim = utils.Adam16(params=self.parameters(), lr=self.lr,\n                             betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps)\n    else:\n      self.optim = optim.Adam(params=self.parameters(), lr=self.lr,\n                             betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps)\n    # LR scheduling, left here for forward compatibility\n    # self.lr_sched = {'itr' : 0}# if self.progressive else {}\n    # self.j = 0\n\n  # Initialize\n  def init_weights(self):\n    self.param_count = 0\n    for module in self.modules():\n      if (isinstance(module, nn.Conv2d)\n          or isinstance(module, nn.Linear)\n          or isinstance(module, nn.Embedding)):\n        if self.init == 'ortho':\n          init.orthogonal_(module.weight)\n        elif self.init == 'N02':\n          init.normal_(module.weight, 0, 0.02)\n        elif self.init in ['glorot', 'xavier']:\n          init.xavier_uniform_(module.weight)\n        else:\n          print('Init style not recognized...')\n        self.param_count += sum([p.data.nelement() for p in module.parameters()])\n    print('Param count for D''s initialized parameters: %d' % self.param_count)\n\n  def forward(self, x, y=None):\n    # Stick x into h for cleaner for loops without flow control\n    h = x\n    # Loop over blocks\n    for index, blocklist in enumerate(self.blocks):\n      for block in blocklist:\n        h = block(h)\n    # Apply global sum pooling as in SN-GAN\n    h = torch.sum(self.activation(h), [2, 3])\n    # Get initial class-unconditional output\n    out = self.linear(h)\n    # Get projection of final featureset onto class vectors and add to evidence\n    out = out + torch.sum(self.embed(y) * h, 1, keepdim=True)\n    return out\n\n# Parallelized G_D to minimize cross-gpu communication\n# Without this, Generator outputs would get all-gathered and then rebroadcast.\nclass G_D(nn.Module):\n  def __init__(self, G, D):\n    super(G_D, self).__init__()\n    self.G = G\n    self.D = D\n\n  def forward(self, z, gy, x=None, dy=None, train_G=False, return_G_z=False,\n              split_D=False):              \n    # If training G, enable grad tape\n    with torch.set_grad_enabled(train_G):\n      # Get Generator output given noise\n      G_z = self.G(z, self.G.shared(gy))\n      # Cast as necessary\n      if self.G.fp16 and not self.D.fp16:\n        G_z = G_z.float()\n      if self.D.fp16 and not self.G.fp16:\n        G_z = G_z.half()\n    # Split_D means to run D once with real data and once with fake,\n    # rather than concatenating along the batch dimension.\n    if split_D:\n      D_fake = self.D(G_z, gy)\n      if x is not None:\n        D_real = self.D(x, dy)\n        return D_fake, D_real\n      else:\n        if return_G_z:\n          return D_fake, G_z\n        else:\n          return D_fake\n    # If real data is provided, concatenate it with the Generator's output\n    # along the batch dimension for improved efficiency.\n    else:\n      D_input = torch.cat([G_z, x], 0) if x is not None else G_z\n      D_class = torch.cat([gy, dy], 0) if dy is not None else gy\n      # Get Discriminator output\n      D_out = self.D(D_input, D_class)\n      if x is not None:\n        return torch.split(D_out, [G_z.shape[0], x.shape[0]]) # D_fake, D_real\n      else:\n        if return_G_z:\n          return D_out, G_z\n        else:\n          return D_out\n"
  },
  {
    "path": "BigGAN_utils/BigGANdeep.py",
    "content": "import numpy as np\nimport math\nimport functools\n\nimport torch\nimport torch.nn as nn\nfrom torch.nn import init\nimport torch.optim as optim\nimport torch.nn.functional as F\nfrom torch.nn import Parameter as P\n\nimport layers\nfrom sync_batchnorm import SynchronizedBatchNorm2d as SyncBatchNorm2d\n\n# BigGAN-deep: uses a different resblock and pattern\n\n\n# Architectures for G\n# Attention is passed in in the format '32_64' to mean applying an attention\n# block at both resolution 32x32 and 64x64. Just '64' will apply at 64x64.\n\n# Channel ratio is the ratio of \nclass GBlock(nn.Module):\n  def __init__(self, in_channels, out_channels,\n               which_conv=nn.Conv2d, which_bn=layers.bn, activation=None,\n               upsample=None, channel_ratio=4):\n    super(GBlock, self).__init__()\n    \n    self.in_channels, self.out_channels = in_channels, out_channels\n    self.hidden_channels = self.in_channels // channel_ratio\n    self.which_conv, self.which_bn = which_conv, which_bn\n    self.activation = activation\n    # Conv layers\n    self.conv1 = self.which_conv(self.in_channels, self.hidden_channels, \n                                 kernel_size=1, padding=0)\n    self.conv2 = self.which_conv(self.hidden_channels, self.hidden_channels)\n    self.conv3 = self.which_conv(self.hidden_channels, self.hidden_channels)\n    self.conv4 = self.which_conv(self.hidden_channels, self.out_channels, \n                                 kernel_size=1, padding=0)\n    # Batchnorm layers\n    self.bn1 = self.which_bn(self.in_channels)\n    self.bn2 = self.which_bn(self.hidden_channels)\n    self.bn3 = self.which_bn(self.hidden_channels)\n    self.bn4 = self.which_bn(self.hidden_channels)\n    # upsample layers\n    self.upsample = upsample\n\n  def forward(self, x, y):\n    # Project down to channel ratio\n    h = self.conv1(self.activation(self.bn1(x, y)))\n    # Apply next BN-ReLU\n    h = self.activation(self.bn2(h, y))\n    # Drop channels in x if necessary\n    if self.in_channels != self.out_channels:\n      x = x[:, :self.out_channels]      \n    # Upsample both h and x at this point  \n    if self.upsample:\n      h = self.upsample(h)\n      x = self.upsample(x)\n    # 3x3 convs\n    h = self.conv2(h)\n    h = self.conv3(self.activation(self.bn3(h, y)))\n    # Final 1x1 conv\n    h = self.conv4(self.activation(self.bn4(h, y)))\n    return h + x\n\ndef G_arch(ch=64, attention='64', ksize='333333', dilation='111111'):\n  arch = {}\n  arch[256] = {'in_channels' :  [ch * item for item in [16, 16, 8, 8, 4, 2]],\n               'out_channels' : [ch * item for item in [16,  8, 8, 4, 2, 1]],\n               'upsample' : [True] * 6,\n               'resolution' : [8, 16, 32, 64, 128, 256],\n               'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')])\n                              for i in range(3,9)}}\n  arch[128] = {'in_channels' :  [ch * item for item in [16, 16, 8, 4, 2]],\n               'out_channels' : [ch * item for item in [16, 8, 4,  2, 1]],\n               'upsample' : [True] * 5,\n               'resolution' : [8, 16, 32, 64, 128],\n               'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')])\n                              for i in range(3,8)}}\n  arch[64]  = {'in_channels' :  [ch * item for item in [16, 16, 8, 4]],\n               'out_channels' : [ch * item for item in [16, 8, 4, 2]],\n               'upsample' : [True] * 4,\n               'resolution' : [8, 16, 32, 64],\n               'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')])\n                              for i in range(3,7)}}\n  arch[32]  = {'in_channels' :  [ch * item for item in [4, 4, 4]],\n               'out_channels' : [ch * item for item in [4, 4, 4]],\n               'upsample' : [True] * 3,\n               'resolution' : [8, 16, 32],\n               'attention' : {2**i: (2**i in [int(item) for item in attention.split('_')])\n                              for i in range(3,6)}}\n\n  return arch\n\nclass Generator(nn.Module):\n  def __init__(self, G_ch=64, G_depth=2, dim_z=128, bottom_width=4, resolution=128,\n               G_kernel_size=3, G_attn='64', n_classes=1000,\n               num_G_SVs=1, num_G_SV_itrs=1,\n               G_shared=True, shared_dim=0, hier=False,\n               cross_replica=False, mybn=False,\n               G_activation=nn.ReLU(inplace=False),\n               G_lr=5e-5, G_B1=0.0, G_B2=0.999, adam_eps=1e-8,\n               BN_eps=1e-5, SN_eps=1e-12, G_mixed_precision=False, G_fp16=False,\n               G_init='ortho', skip_init=False, no_optim=False,\n               G_param='SN', norm_style='bn',\n               **kwargs):\n    super(Generator, self).__init__()\n    # Channel width mulitplier\n    self.ch = G_ch\n    # Number of resblocks per stage\n    self.G_depth = G_depth\n    # Dimensionality of the latent space\n    self.dim_z = dim_z\n    # The initial spatial dimensions\n    self.bottom_width = bottom_width\n    # Resolution of the output\n    self.resolution = resolution\n    # Kernel size?\n    self.kernel_size = G_kernel_size\n    # Attention?\n    self.attention = G_attn\n    # number of classes, for use in categorical conditional generation\n    self.n_classes = n_classes\n    # Use shared embeddings?\n    self.G_shared = G_shared\n    # Dimensionality of the shared embedding? Unused if not using G_shared\n    self.shared_dim = shared_dim if shared_dim > 0 else dim_z\n    # Hierarchical latent space?\n    self.hier = hier\n    # Cross replica batchnorm?\n    self.cross_replica = cross_replica\n    # Use my batchnorm?\n    self.mybn = mybn\n    # nonlinearity for residual blocks\n    self.activation = G_activation\n    # Initialization style\n    self.init = G_init\n    # Parameterization style\n    self.G_param = G_param\n    # Normalization style\n    self.norm_style = norm_style\n    # Epsilon for BatchNorm?\n    self.BN_eps = BN_eps\n    # Epsilon for Spectral Norm?\n    self.SN_eps = SN_eps\n    # fp16?\n    self.fp16 = G_fp16\n    # Architecture dict\n    self.arch = G_arch(self.ch, self.attention)[resolution]\n\n\n    # Which convs, batchnorms, and linear layers to use\n    if self.G_param == 'SN':\n      self.which_conv = functools.partial(layers.SNConv2d,\n                          kernel_size=3, padding=1,\n                          num_svs=num_G_SVs, num_itrs=num_G_SV_itrs,\n                          eps=self.SN_eps)\n      self.which_linear = functools.partial(layers.SNLinear,\n                          num_svs=num_G_SVs, num_itrs=num_G_SV_itrs,\n                          eps=self.SN_eps)\n    else:\n      self.which_conv = functools.partial(nn.Conv2d, kernel_size=3, padding=1)\n      self.which_linear = nn.Linear\n      \n    # We use a non-spectral-normed embedding here regardless;\n    # For some reason applying SN to G's embedding seems to randomly cripple G\n    self.which_embedding = nn.Embedding\n    bn_linear = (functools.partial(self.which_linear, bias=False) if self.G_shared\n                 else self.which_embedding)\n    self.which_bn = functools.partial(layers.ccbn,\n                          which_linear=bn_linear,\n                          cross_replica=self.cross_replica,\n                          mybn=self.mybn,\n                          input_size=(self.shared_dim + self.dim_z if self.G_shared\n                                      else self.n_classes),\n                          norm_style=self.norm_style,\n                          eps=self.BN_eps)\n\n\n    # Prepare model\n    # If not using shared embeddings, self.shared is just a passthrough\n    self.shared = (self.which_embedding(n_classes, self.shared_dim) if G_shared \n                    else layers.identity())\n    # First linear layer\n    self.linear = self.which_linear(self.dim_z + self.shared_dim, self.arch['in_channels'][0] * (self.bottom_width **2))\n\n    # self.blocks is a doubly-nested list of modules, the outer loop intended\n    # to be over blocks at a given resolution (resblocks and/or self-attention)\n    # while the inner loop is over a given block\n    self.blocks = []\n    for index in range(len(self.arch['out_channels'])):\n      self.blocks += [[GBlock(in_channels=self.arch['in_channels'][index],\n                             out_channels=self.arch['in_channels'][index] if g_index==0 else self.arch['out_channels'][index],\n                             which_conv=self.which_conv,\n                             which_bn=self.which_bn,\n                             activation=self.activation,\n                             upsample=(functools.partial(F.interpolate, scale_factor=2)\n                                       if self.arch['upsample'][index] and g_index == (self.G_depth-1) else None))]\n                       for g_index in range(self.G_depth)]\n\n      # If attention on this block, attach it to the end\n      if self.arch['attention'][self.arch['resolution'][index]]:\n        print('Adding attention layer in G at resolution %d' % self.arch['resolution'][index])\n        self.blocks[-1] += [layers.Attention(self.arch['out_channels'][index], self.which_conv)]\n\n    # Turn self.blocks into a ModuleList so that it's all properly registered.\n    self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks])\n\n    # output layer: batchnorm-relu-conv.\n    # Consider using a non-spectral conv here\n    self.output_layer = nn.Sequential(layers.bn(self.arch['out_channels'][-1],\n                                                cross_replica=self.cross_replica,\n                                                mybn=self.mybn),\n                                    self.activation,\n                                    self.which_conv(self.arch['out_channels'][-1], 3))\n\n    # Initialize weights. Optionally skip init for testing.\n    if not skip_init:\n      self.init_weights()\n\n    # Set up optimizer\n    # If this is an EMA copy, no need for an optim, so just return now\n    if no_optim:\n      return\n    self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps\n    if G_mixed_precision:\n      print('Using fp16 adam in G...')\n      import utils\n      self.optim = utils.Adam16(params=self.parameters(), lr=self.lr,\n                           betas=(self.B1, self.B2), weight_decay=0,\n                           eps=self.adam_eps)\n    else:\n      self.optim = optim.Adam(params=self.parameters(), lr=self.lr,\n                           betas=(self.B1, self.B2), weight_decay=0,\n                           eps=self.adam_eps)\n\n    # LR scheduling, left here for forward compatibility\n    # self.lr_sched = {'itr' : 0}# if self.progressive else {}\n    # self.j = 0\n\n  # Initialize\n  def init_weights(self):\n    self.param_count = 0\n    for module in self.modules():\n      if (isinstance(module, nn.Conv2d) \n          or isinstance(module, nn.Linear) \n          or isinstance(module, nn.Embedding)):\n        if self.init == 'ortho':\n          init.orthogonal_(module.weight)\n        elif self.init == 'N02':\n          init.normal_(module.weight, 0, 0.02)\n        elif self.init in ['glorot', 'xavier']:\n          init.xavier_uniform_(module.weight)\n        else:\n          print('Init style not recognized...')\n        self.param_count += sum([p.data.nelement() for p in module.parameters()])\n    print('Param count for G''s initialized parameters: %d' % self.param_count)\n\n  # Note on this forward function: we pass in a y vector which has\n  # already been passed through G.shared to enable easy class-wise\n  # interpolation later. If we passed in the one-hot and then ran it through\n  # G.shared in this forward function, it would be harder to handle.\n  # NOTE: The z vs y dichotomy here is for compatibility with not-y\n  def forward(self, z, y):\n    # If hierarchical, concatenate zs and ys\n    if self.hier:\n      z = torch.cat([y, z], 1)      \n      y = z\n    # First linear layer\n    h = self.linear(z)\n    # Reshape\n    h = h.view(h.size(0), -1, self.bottom_width, self.bottom_width)    \n    # Loop over blocks\n    for index, blocklist in enumerate(self.blocks):\n      # Second inner loop in case block has multiple layers\n      for block in blocklist:\n        h = block(h, y)\n        \n    # Apply batchnorm-relu-conv-tanh at output\n    return torch.tanh(self.output_layer(h))\n\nclass DBlock(nn.Module):\n  def __init__(self, in_channels, out_channels, which_conv=layers.SNConv2d, wide=True,\n               preactivation=True, activation=None, downsample=None,\n               channel_ratio=4):\n    super(DBlock, self).__init__()\n    self.in_channels, self.out_channels = in_channels, out_channels\n    # If using wide D (as in SA-GAN and BigGAN), change the channel pattern\n    self.hidden_channels = self.out_channels // channel_ratio\n    self.which_conv = which_conv\n    self.preactivation = preactivation\n    self.activation = activation\n    self.downsample = downsample\n        \n    # Conv layers\n    self.conv1 = self.which_conv(self.in_channels, self.hidden_channels, \n                                 kernel_size=1, padding=0)\n    self.conv2 = self.which_conv(self.hidden_channels, self.hidden_channels)\n    self.conv3 = self.which_conv(self.hidden_channels, self.hidden_channels)\n    self.conv4 = self.which_conv(self.hidden_channels, self.out_channels, \n                                 kernel_size=1, padding=0)\n                                 \n    self.learnable_sc = True if (in_channels != out_channels) else False\n    if self.learnable_sc:\n      self.conv_sc = self.which_conv(in_channels, out_channels - in_channels, \n                                     kernel_size=1, padding=0)\n  def shortcut(self, x):\n    if self.downsample:\n      x = self.downsample(x)\n    if self.learnable_sc:\n      x = torch.cat([x, self.conv_sc(x)], 1)    \n    return x\n    \n  def forward(self, x):\n    # 1x1 bottleneck conv\n    h = self.conv1(F.relu(x))\n    # 3x3 convs\n    h = self.conv2(self.activation(h))\n    h = self.conv3(self.activation(h))\n    # relu before downsample\n    h = self.activation(h)\n    # downsample\n    if self.downsample:\n      h = self.downsample(h)     \n    # final 1x1 conv\n    h = self.conv4(h)\n    return h + self.shortcut(x)\n    \n# Discriminator architecture, same paradigm as G's above\ndef D_arch(ch=64, attention='64',ksize='333333', dilation='111111'):\n  arch = {}\n  arch[256] = {'in_channels' :  [item * ch for item in [1, 2, 4, 8, 8, 16]],\n               'out_channels' : [item * ch for item in [2, 4, 8, 8, 16, 16]],\n               'downsample' : [True] * 6 + [False],\n               'resolution' : [128, 64, 32, 16, 8, 4, 4 ],\n               'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')]\n                              for i in range(2,8)}}\n  arch[128] = {'in_channels' :  [item * ch for item in [1, 2, 4,  8, 16]],\n               'out_channels' : [item * ch for item in [2, 4, 8, 16, 16]],\n               'downsample' : [True] * 5 + [False],\n               'resolution' : [64, 32, 16, 8, 4, 4],\n               'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')]\n                              for i in range(2,8)}}\n  arch[64]  = {'in_channels' :  [item * ch for item in [1, 2, 4, 8]],\n               'out_channels' : [item * ch for item in [2, 4, 8, 16]],\n               'downsample' : [True] * 4 + [False],\n               'resolution' : [32, 16, 8, 4, 4],\n               'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')]\n                              for i in range(2,7)}}\n  arch[32]  = {'in_channels' :  [item * ch for item in [4, 4, 4]],\n               'out_channels' : [item * ch for item in [4, 4, 4]],\n               'downsample' : [True, True, False, False],\n               'resolution' : [16, 16, 16, 16],\n               'attention' : {2**i: 2**i in [int(item) for item in attention.split('_')]\n                              for i in range(2,6)}}\n  return arch\n\nclass Discriminator(nn.Module):\n\n  def __init__(self, D_ch=64, D_wide=True, D_depth=2, resolution=128,\n               D_kernel_size=3, D_attn='64', n_classes=1000,\n               num_D_SVs=1, num_D_SV_itrs=1, D_activation=nn.ReLU(inplace=False),\n               D_lr=2e-4, D_B1=0.0, D_B2=0.999, adam_eps=1e-8,\n               SN_eps=1e-12, output_dim=1, D_mixed_precision=False, D_fp16=False,\n               D_init='ortho', skip_init=False, D_param='SN', **kwargs):\n    super(Discriminator, self).__init__()\n    # Width multiplier\n    self.ch = D_ch\n    # Use Wide D as in BigGAN and SA-GAN or skinny D as in SN-GAN?\n    self.D_wide = D_wide\n    # How many resblocks per stage?\n    self.D_depth = D_depth\n    # Resolution\n    self.resolution = resolution\n    # Kernel size\n    self.kernel_size = D_kernel_size\n    # Attention?\n    self.attention = D_attn\n    # Number of classes\n    self.n_classes = n_classes\n    # Activation\n    self.activation = D_activation\n    # Initialization style\n    self.init = D_init\n    # Parameterization style\n    self.D_param = D_param\n    # Epsilon for Spectral Norm?\n    self.SN_eps = SN_eps\n    # Fp16?\n    self.fp16 = D_fp16\n    # Architecture\n    self.arch = D_arch(self.ch, self.attention)[resolution]\n\n\n    # Which convs, batchnorms, and linear layers to use\n    # No option to turn off SN in D right now\n    if self.D_param == 'SN':\n      self.which_conv = functools.partial(layers.SNConv2d,\n                          kernel_size=3, padding=1,\n                          num_svs=num_D_SVs, num_itrs=num_D_SV_itrs,\n                          eps=self.SN_eps)\n      self.which_linear = functools.partial(layers.SNLinear,\n                          num_svs=num_D_SVs, num_itrs=num_D_SV_itrs,\n                          eps=self.SN_eps)\n      self.which_embedding = functools.partial(layers.SNEmbedding,\n                              num_svs=num_D_SVs, num_itrs=num_D_SV_itrs,\n                              eps=self.SN_eps)\n    \n    \n    # Prepare model\n    # Stem convolution\n    self.input_conv = self.which_conv(3, self.arch['in_channels'][0])\n    # self.blocks is a doubly-nested list of modules, the outer loop intended\n    # to be over blocks at a given resolution (resblocks and/or self-attention)\n    self.blocks = []\n    for index in range(len(self.arch['out_channels'])):\n      self.blocks += [[DBlock(in_channels=self.arch['in_channels'][index] if d_index==0 else self.arch['out_channels'][index],\n                       out_channels=self.arch['out_channels'][index],\n                       which_conv=self.which_conv,\n                       wide=self.D_wide,\n                       activation=self.activation,\n                       preactivation=True,\n                       downsample=(nn.AvgPool2d(2) if self.arch['downsample'][index] and d_index==0 else None))\n                       for d_index in range(self.D_depth)]]\n      # If attention on this block, attach it to the end\n      if self.arch['attention'][self.arch['resolution'][index]]:\n        print('Adding attention layer in D at resolution %d' % self.arch['resolution'][index])\n        self.blocks[-1] += [layers.Attention(self.arch['out_channels'][index],\n                                             self.which_conv)]\n    # Turn self.blocks into a ModuleList so that it's all properly registered.\n    self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks])\n    # Linear output layer. The output dimension is typically 1, but may be\n    # larger if we're e.g. turning this into a VAE with an inference output\n    self.linear = self.which_linear(self.arch['out_channels'][-1], output_dim)\n    # Embedding for projection discrimination\n    self.embed = self.which_embedding(self.n_classes, self.arch['out_channels'][-1])\n\n    # Initialize weights\n    if not skip_init:\n      self.init_weights()\n\n    # Set up optimizer\n    self.lr, self.B1, self.B2, self.adam_eps = D_lr, D_B1, D_B2, adam_eps\n    if D_mixed_precision:\n      print('Using fp16 adam in D...')\n      import utils\n      self.optim = utils.Adam16(params=self.parameters(), lr=self.lr,\n                             betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps)\n    else:\n      self.optim = optim.Adam(params=self.parameters(), lr=self.lr,\n                             betas=(self.B1, self.B2), weight_decay=0, eps=self.adam_eps)\n    # LR scheduling, left here for forward compatibility\n    # self.lr_sched = {'itr' : 0}# if self.progressive else {}\n    # self.j = 0\n\n  # Initialize\n  def init_weights(self):\n    self.param_count = 0\n    for module in self.modules():\n      if (isinstance(module, nn.Conv2d)\n          or isinstance(module, nn.Linear)\n          or isinstance(module, nn.Embedding)):\n        if self.init == 'ortho':\n          init.orthogonal_(module.weight)\n        elif self.init == 'N02':\n          init.normal_(module.weight, 0, 0.02)\n        elif self.init in ['glorot', 'xavier']:\n          init.xavier_uniform_(module.weight)\n        else:\n          print('Init style not recognized...')\n        self.param_count += sum([p.data.nelement() for p in module.parameters()])\n    print('Param count for D''s initialized parameters: %d' % self.param_count)\n\n  def forward(self, x, y=None):\n    # Run input conv\n    h = self.input_conv(x)\n    # Loop over blocks\n    for index, blocklist in enumerate(self.blocks):\n      for block in blocklist:\n        h = block(h)\n    # Apply global sum pooling as in SN-GAN\n    h = torch.sum(self.activation(h), [2, 3])\n    # Get initial class-unconditional output\n    out = self.linear(h)\n    # Get projection of final featureset onto class vectors and add to evidence\n    out = out + torch.sum(self.embed(y) * h, 1, keepdim=True)\n    return out\n\n# Parallelized G_D to minimize cross-gpu communication\n# Without this, Generator outputs would get all-gathered and then rebroadcast.\nclass G_D(nn.Module):\n  def __init__(self, G, D):\n    super(G_D, self).__init__()\n    self.G = G\n    self.D = D\n\n  def forward(self, z, gy, x=None, dy=None, train_G=False, return_G_z=False,\n              split_D=False):              \n    # If training G, enable grad tape\n    with torch.set_grad_enabled(train_G):\n      # Get Generator output given noise\n      G_z = self.G(z, self.G.shared(gy))\n      # Cast as necessary\n      if self.G.fp16 and not self.D.fp16:\n        G_z = G_z.float()\n      if self.D.fp16 and not self.G.fp16:\n        G_z = G_z.half()\n    # Split_D means to run D once with real data and once with fake,\n    # rather than concatenating along the batch dimension.\n    if split_D:\n      D_fake = self.D(G_z, gy)\n      if x is not None:\n        D_real = self.D(x, dy)\n        return D_fake, D_real\n      else:\n        if return_G_z:\n          return D_fake, G_z\n        else:\n          return D_fake\n    # If real data is provided, concatenate it with the Generator's output\n    # along the batch dimension for improved efficiency.\n    else:\n      D_input = torch.cat([G_z, x], 0) if x is not None else G_z\n      D_class = torch.cat([gy, dy], 0) if dy is not None else gy\n      # Get Discriminator output\n      D_out = self.D(D_input, D_class)\n      if x is not None:\n        return torch.split(D_out, [G_z.shape[0], x.shape[0]]) # D_fake, D_real\n      else:\n        if return_G_z:\n          return D_out, G_z\n        else:\n          return D_out\n"
  },
  {
    "path": "BigGAN_utils/LICENSE",
    "content": "MIT License\n\nCopyright (c) 2019 Andy Brock\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "BigGAN_utils/README.md",
    "content": "Download pre-trained weights from (https://drive.google.com/drive/folders/1nJ3HmgYgeA9NZr-oU-enqbYeO7zBaANs?usp=sharing) and put them in `./weights/`\n"
  },
  {
    "path": "BigGAN_utils/TFHub/biggan_v1.py",
    "content": "# BigGAN V1:\n# This is now deprecated code used for porting the TFHub modules to pytorch,\n# included here for reference only.\nimport numpy as np\nimport torch\nfrom scipy.stats import truncnorm\nfrom torch import nn\nfrom torch.nn import Parameter\nfrom torch.nn import functional as F\n\n\ndef l2normalize(v, eps=1e-4):\n  return v / (v.norm() + eps)\n\n\ndef truncated_z_sample(batch_size, z_dim, truncation=0.5, seed=None):\n  state = None if seed is None else np.random.RandomState(seed)\n  values = truncnorm.rvs(-2, 2, size=(batch_size, z_dim), random_state=state)\n  return truncation * values\n\n\ndef denorm(x):\n  out = (x + 1) / 2\n  return out.clamp_(0, 1)\n\n\nclass SpectralNorm(nn.Module):\n  def __init__(self, module, name='weight', power_iterations=1):\n    super(SpectralNorm, self).__init__()\n    self.module = module\n    self.name = name\n    self.power_iterations = power_iterations\n    if not self._made_params():\n      self._make_params()\n\n  def _update_u_v(self):\n    u = getattr(self.module, self.name + \"_u\")\n    v = getattr(self.module, self.name + \"_v\")\n    w = getattr(self.module, self.name + \"_bar\")\n\n    height = w.data.shape[0]\n    _w = w.view(height, -1)\n    for _ in range(self.power_iterations):\n      v = l2normalize(torch.matmul(_w.t(), u))\n      u = l2normalize(torch.matmul(_w, v))\n\n    sigma = u.dot((_w).mv(v))\n    setattr(self.module, self.name, w / sigma.expand_as(w))\n\n  def _made_params(self):\n    try:\n      getattr(self.module, self.name + \"_u\")\n      getattr(self.module, self.name + \"_v\")\n      getattr(self.module, self.name + \"_bar\")\n      return True\n    except AttributeError:\n      return False\n\n  def _make_params(self):\n    w = getattr(self.module, self.name)\n\n    height = w.data.shape[0]\n    width = w.view(height, -1).data.shape[1]\n\n    u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)\n    v = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)\n    u.data = l2normalize(u.data)\n    v.data = l2normalize(v.data)\n    w_bar = Parameter(w.data)\n\n    del self.module._parameters[self.name]\n    self.module.register_parameter(self.name + \"_u\", u)\n    self.module.register_parameter(self.name + \"_v\", v)\n    self.module.register_parameter(self.name + \"_bar\", w_bar)\n\n  def forward(self, *args):\n    self._update_u_v()\n    return self.module.forward(*args)\n\n\nclass SelfAttention(nn.Module):\n  \"\"\" Self Attention Layer\"\"\"\n\n  def __init__(self, in_dim, activation=F.relu):\n    super().__init__()\n    self.chanel_in = in_dim\n    self.activation = activation\n\n    self.theta = SpectralNorm(nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1, bias=False))\n    self.phi = SpectralNorm(nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1, bias=False))\n    self.pool = nn.MaxPool2d(2, 2)\n    self.g = SpectralNorm(nn.Conv2d(in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1, bias=False))\n    self.o_conv = SpectralNorm(nn.Conv2d(in_channels=in_dim // 2, out_channels=in_dim, kernel_size=1, bias=False))\n    self.gamma = nn.Parameter(torch.zeros(1))\n\n    self.softmax = nn.Softmax(dim=-1)\n\n  def forward(self, x):\n    m_batchsize, C, width, height = x.size()\n    N = height * width\n\n    theta = self.theta(x)\n    phi = self.phi(x)\n    phi = self.pool(phi)\n    phi = phi.view(m_batchsize, -1, N // 4)\n    theta = theta.view(m_batchsize, -1, N)\n    theta = theta.permute(0, 2, 1)\n    attention = self.softmax(torch.bmm(theta, phi))\n    g = self.pool(self.g(x)).view(m_batchsize, -1, N // 4)\n    attn_g = torch.bmm(g, attention.permute(0, 2, 1)).view(m_batchsize, -1, width, height)\n    out = self.o_conv(attn_g)\n    return self.gamma * out + x\n\n\nclass ConditionalBatchNorm2d(nn.Module):\n  def __init__(self, num_features, num_classes, eps=1e-4, momentum=0.1):\n    super().__init__()\n    self.num_features = num_features\n    self.bn = nn.BatchNorm2d(num_features, affine=False, eps=eps, momentum=momentum)\n    self.gamma_embed = SpectralNorm(nn.Linear(num_classes, num_features, bias=False))\n    self.beta_embed = SpectralNorm(nn.Linear(num_classes, num_features, bias=False))\n\n  def forward(self, x, y):\n    out = self.bn(x)\n    gamma = self.gamma_embed(y) + 1\n    beta = self.beta_embed(y)\n    out = gamma.view(-1, self.num_features, 1, 1) * out + beta.view(-1, self.num_features, 1, 1)\n    return out\n\n\nclass GBlock(nn.Module):\n  def __init__(\n    self,\n    in_channel,\n    out_channel,\n    kernel_size=[3, 3],\n    padding=1,\n    stride=1,\n    n_class=None,\n    bn=True,\n    activation=F.relu,\n    upsample=True,\n    downsample=False,\n    z_dim=148,\n  ):\n    super().__init__()\n\n    self.conv0 = SpectralNorm(\n      nn.Conv2d(in_channel, out_channel, kernel_size, stride, padding, bias=True if bn else True)\n    )\n    self.conv1 = SpectralNorm(\n      nn.Conv2d(out_channel, out_channel, kernel_size, stride, padding, bias=True if bn else True)\n    )\n\n    self.skip_proj = False\n    if in_channel != out_channel or upsample or downsample:\n      self.conv_sc = SpectralNorm(nn.Conv2d(in_channel, out_channel, 1, 1, 0))\n      self.skip_proj = True\n\n    self.upsample = upsample\n    self.downsample = downsample\n    self.activation = activation\n    self.bn = bn\n    if bn:\n      self.HyperBN = ConditionalBatchNorm2d(in_channel, z_dim)\n      self.HyperBN_1 = ConditionalBatchNorm2d(out_channel, z_dim)\n\n  def forward(self, input, condition=None):\n    out = input\n\n    if self.bn:\n      out = self.HyperBN(out, condition)\n    out = self.activation(out)\n    if self.upsample:\n      out = F.interpolate(out, scale_factor=2)\n    out = self.conv0(out)\n    if self.bn:\n      out = self.HyperBN_1(out, condition)\n    out = self.activation(out)\n    out = self.conv1(out)\n\n    if self.downsample:\n      out = F.avg_pool2d(out, 2)\n\n    if self.skip_proj:\n      skip = input\n      if self.upsample:\n        skip = F.interpolate(skip, scale_factor=2)\n      skip = self.conv_sc(skip)\n      if self.downsample:\n        skip = F.avg_pool2d(skip, 2)\n    else:\n      skip = input\n    return out + skip\n\n\nclass Generator128(nn.Module):\n  def __init__(self, code_dim=120, n_class=1000, chn=96, debug=False):\n    super().__init__()\n\n    self.linear = nn.Linear(n_class, 128, bias=False)\n\n    if debug:\n      chn = 8\n\n    self.first_view = 16 * chn\n\n    self.G_linear = SpectralNorm(nn.Linear(20, 4 * 4 * 16 * chn))\n\n    z_dim = code_dim + 28\n\n    self.GBlock = nn.ModuleList([\n      GBlock(16 * chn, 16 * chn, n_class=n_class, z_dim=z_dim),\n      GBlock(16 * chn, 8 * chn, n_class=n_class, z_dim=z_dim),\n      GBlock(8 * chn, 4 * chn, n_class=n_class, z_dim=z_dim),\n      GBlock(4 * chn, 2 * chn, n_class=n_class, z_dim=z_dim),\n      GBlock(2 * chn, 1 * chn, n_class=n_class, z_dim=z_dim),\n    ])\n\n    self.sa_id = 4\n    self.num_split = len(self.GBlock) + 1\n    self.attention = SelfAttention(2 * chn)\n    self.ScaledCrossReplicaBN = nn.BatchNorm2d(1 * chn, eps=1e-4)\n    self.colorize = SpectralNorm(nn.Conv2d(1 * chn, 3, [3, 3], padding=1))\n\n  def forward(self, input, class_id):\n    codes = torch.chunk(input, self.num_split, 1)\n    class_emb = self.linear(class_id)  # 128\n\n    out = self.G_linear(codes[0])\n    out = out.view(-1, 4, 4, self.first_view).permute(0, 3, 1, 2)\n    for i, (code, GBlock) in enumerate(zip(codes[1:], self.GBlock)):\n      if i == self.sa_id:\n        out = self.attention(out)\n      condition = torch.cat([code, class_emb], 1)\n      out = GBlock(out, condition)\n\n    out = self.ScaledCrossReplicaBN(out)\n    out = F.relu(out)\n    out = self.colorize(out)\n    return torch.tanh(out)\n\n\nclass Generator256(nn.Module):\n  def __init__(self, code_dim=140, n_class=1000, chn=96, debug=False):\n    super().__init__()\n\n    self.linear = nn.Linear(n_class, 128, bias=False)\n\n    if debug:\n      chn = 8\n\n    self.first_view = 16 * chn\n\n    self.G_linear = SpectralNorm(nn.Linear(20, 4 * 4 * 16 * chn))\n\n    self.GBlock = nn.ModuleList([\n      GBlock(16 * chn, 16 * chn, n_class=n_class),\n      GBlock(16 * chn, 8 * chn, n_class=n_class),\n      GBlock(8 * chn, 8 * chn, n_class=n_class),\n      GBlock(8 * chn, 4 * chn, n_class=n_class),\n      GBlock(4 * chn, 2 * chn, n_class=n_class),\n      GBlock(2 * chn, 1 * chn, n_class=n_class),\n    ])\n\n    self.sa_id = 5\n    self.num_split = len(self.GBlock) + 1\n    self.attention = SelfAttention(2 * chn)\n    self.ScaledCrossReplicaBN = nn.BatchNorm2d(1 * chn, eps=1e-4)\n    self.colorize = SpectralNorm(nn.Conv2d(1 * chn, 3, [3, 3], padding=1))\n\n  def forward(self, input, class_id):\n    codes = torch.chunk(input, self.num_split, 1)\n    class_emb = self.linear(class_id)  # 128\n\n    out = self.G_linear(codes[0])\n    out = out.view(-1, 4, 4, self.first_view).permute(0, 3, 1, 2)\n    for i, (code, GBlock) in enumerate(zip(codes[1:], self.GBlock)):\n      if i == self.sa_id:\n        out = self.attention(out)\n      condition = torch.cat([code, class_emb], 1)\n      out = GBlock(out, condition)\n\n    out = self.ScaledCrossReplicaBN(out)\n    out = F.relu(out)\n    out = self.colorize(out)\n    return torch.tanh(out)\n\n\nclass Generator512(nn.Module):\n  def __init__(self, code_dim=128, n_class=1000, chn=96, debug=False):\n    super().__init__()\n\n    self.linear = nn.Linear(n_class, 128, bias=False)\n\n    if debug:\n      chn = 8\n\n    self.first_view = 16 * chn\n\n    self.G_linear = SpectralNorm(nn.Linear(16, 4 * 4 * 16 * chn))\n\n    z_dim = code_dim + 16\n\n    self.GBlock = nn.ModuleList([\n      GBlock(16 * chn, 16 * chn, n_class=n_class, z_dim=z_dim),\n      GBlock(16 * chn, 8 * chn, n_class=n_class, z_dim=z_dim),\n      GBlock(8 * chn, 8 * chn, n_class=n_class, z_dim=z_dim),\n      GBlock(8 * chn, 4 * chn, n_class=n_class, z_dim=z_dim),\n      GBlock(4 * chn, 2 * chn, n_class=n_class, z_dim=z_dim),\n      GBlock(2 * chn, 1 * chn, n_class=n_class, z_dim=z_dim),\n      GBlock(1 * chn, 1 * chn, n_class=n_class, z_dim=z_dim),\n    ])\n\n    self.sa_id = 4\n    self.num_split = len(self.GBlock) + 1\n    self.attention = SelfAttention(4 * chn)\n    self.ScaledCrossReplicaBN = nn.BatchNorm2d(1 * chn)\n    self.colorize = SpectralNorm(nn.Conv2d(1 * chn, 3, [3, 3], padding=1))\n\n  def forward(self, input, class_id):\n    codes = torch.chunk(input, self.num_split, 1)\n    class_emb = self.linear(class_id)  # 128\n\n    out = self.G_linear(codes[0])\n    out = out.view(-1, 4, 4, self.first_view).permute(0, 3, 1, 2)\n    for i, (code, GBlock) in enumerate(zip(codes[1:], self.GBlock)):\n      if i == self.sa_id:\n        out = self.attention(out)\n      condition = torch.cat([code, class_emb], 1)\n      out = GBlock(out, condition)\n\n    out = self.ScaledCrossReplicaBN(out)\n    out = F.relu(out)\n    out = self.colorize(out)\n    return torch.tanh(out)\n\n\nclass Discriminator(nn.Module):\n  def __init__(self, n_class=1000, chn=96, debug=False):\n    super().__init__()\n\n    def conv(in_channel, out_channel, downsample=True):\n      return GBlock(in_channel, out_channel, bn=False, upsample=False, downsample=downsample)\n\n    if debug:\n      chn = 8\n    self.debug = debug\n\n    self.pre_conv = nn.Sequential(\n      SpectralNorm(nn.Conv2d(3, 1 * chn, 3, padding=1)),\n      nn.ReLU(),\n      SpectralNorm(nn.Conv2d(1 * chn, 1 * chn, 3, padding=1)),\n      nn.AvgPool2d(2),\n    )\n    self.pre_skip = SpectralNorm(nn.Conv2d(3, 1 * chn, 1))\n\n    self.conv = nn.Sequential(\n      conv(1 * chn, 1 * chn, downsample=True),\n      conv(1 * chn, 2 * chn, downsample=True),\n      SelfAttention(2 * chn),\n      conv(2 * chn, 2 * chn, downsample=True),\n      conv(2 * chn, 4 * chn, downsample=True),\n      conv(4 * chn, 8 * chn, downsample=True),\n      conv(8 * chn, 8 * chn, downsample=True),\n      conv(8 * chn, 16 * chn, downsample=True),\n      conv(16 * chn, 16 * chn, downsample=False),\n    )\n\n    self.linear = SpectralNorm(nn.Linear(16 * chn, 1))\n\n    self.embed = nn.Embedding(n_class, 16 * chn)\n    self.embed.weight.data.uniform_(-0.1, 0.1)\n    self.embed = SpectralNorm(self.embed)\n\n  def forward(self, input, class_id):\n\n    out = self.pre_conv(input)\n    out += self.pre_skip(F.avg_pool2d(input, 2))\n    out = self.conv(out)\n    out = F.relu(out)\n    out = out.view(out.size(0), out.size(1), -1)\n    out = out.sum(2)\n    out_linear = self.linear(out).squeeze(1)\n    embed = self.embed(class_id)\n\n    prod = (out * embed).sum(1)\n\n    return out_linear + prod\n"
  },
  {
    "path": "BigGAN_utils/TFHub/converter.py",
    "content": "\"\"\"Utilities for converting TFHub BigGAN generator weights to PyTorch.\nRecommended usage:\nTo convert all BigGAN variants and generate test samples, use:\n```bash\nCUDA_VISIBLE_DEVICES=0 python converter.py --generate_samples\n```\nSee `parse_args` for additional options.\n\"\"\"\n\nimport argparse\nimport os\nimport sys\n\nimport h5py\nimport torch\nimport torch.nn as nn\nfrom torchvision.utils import save_image\nimport tensorflow as tf\nimport tensorflow_hub as hub\nimport parse\n\n# import reference biggan from this folder\nimport biggan_v1 as biggan_for_conversion\n\n# Import model from main folder\nsys.path.append('..')\nimport BigGAN\n\n\n\n\nDEVICE = 'cuda'\nHDF5_TMPL = 'biggan-{}.h5'\nPTH_TMPL = 'biggan-{}.pth'\nMODULE_PATH_TMPL = 'https://tfhub.dev/deepmind/biggan-{}/2'\nZ_DIMS = {\n  128: 120,\n  256: 140,\n  512: 128}\nRESOLUTIONS = list(Z_DIMS)\n\n\ndef dump_tfhub_to_hdf5(module_path, hdf5_path, redownload=False):\n  \"\"\"Loads TFHub weights and saves them to intermediate HDF5 file.\n  Args:\n    module_path ([Path-like]): Path to TFHub module.\n    hdf5_path ([Path-like]): Path to output HDF5 file.\n  Returns:\n    [h5py.File]: Loaded hdf5 file containing module weights.\n  \"\"\"\n  if os.path.exists(hdf5_path) and (not redownload):\n    print('Loading BigGAN hdf5 file from:', hdf5_path)\n    return h5py.File(hdf5_path, 'r')\n\n  print('Loading BigGAN module from:', module_path)\n  tf.reset_default_graph()\n  hub.Module(module_path)\n  print('Loaded BigGAN module from:', module_path)\n\n  initializer = tf.global_variables_initializer()\n  sess = tf.Session()\n  sess.run(initializer)\n\n  print('Saving BigGAN weights to :', hdf5_path)\n  h5f = h5py.File(hdf5_path, 'w')\n  for var in tf.global_variables():\n    val = sess.run(var)\n    h5f.create_dataset(var.name, data=val)\n    print(f'Saving {var.name} with shape {val.shape}')\n  h5f.close()\n  return h5py.File(hdf5_path, 'r')\n\n\nclass TFHub2Pytorch(object):\n\n  TF_ROOT = 'module'\n\n  NUM_GBLOCK = {\n    128: 5,\n    256: 6,\n    512: 7\n  }\n\n  w = 'w'\n  b = 'b'\n  u = 'u0'\n  v = 'u1'\n  gamma = 'gamma'\n  beta = 'beta'\n\n  def __init__(self, state_dict, tf_weights, resolution=256, load_ema=True, verbose=False):\n    self.state_dict = state_dict\n    self.tf_weights = tf_weights\n    self.resolution = resolution\n    self.verbose = verbose\n    if load_ema:\n      for name in ['w', 'b', 'gamma', 'beta']:\n        setattr(self, name, getattr(self, name) + '/ema_b999900')\n\n  def load(self):\n    self.load_generator()\n    return self.state_dict\n\n  def load_generator(self):\n    GENERATOR_ROOT = os.path.join(self.TF_ROOT, 'Generator')\n\n    for i in range(self.NUM_GBLOCK[self.resolution]):\n      name_tf = os.path.join(GENERATOR_ROOT, 'GBlock')\n      name_tf += f'_{i}' if i != 0 else ''\n      self.load_GBlock(f'GBlock.{i}.', name_tf)\n\n    self.load_attention('attention.', os.path.join(GENERATOR_ROOT, 'attention'))\n    self.load_linear('linear', os.path.join(self.TF_ROOT, 'linear'), bias=False)\n    self.load_snlinear('G_linear', os.path.join(GENERATOR_ROOT, 'G_Z', 'G_linear'))\n    self.load_colorize('colorize', os.path.join(GENERATOR_ROOT, 'conv_2d'))\n    self.load_ScaledCrossReplicaBNs('ScaledCrossReplicaBN',\n                    os.path.join(GENERATOR_ROOT, 'ScaledCrossReplicaBN'))\n\n  def load_linear(self, name_pth, name_tf, bias=True):\n    self.state_dict[name_pth + '.weight'] = self.load_tf_tensor(name_tf, self.w).permute(1, 0)\n    if bias:\n      self.state_dict[name_pth + '.bias'] = self.load_tf_tensor(name_tf, self.b)\n\n  def load_snlinear(self, name_pth, name_tf, bias=True):\n    self.state_dict[name_pth + '.module.weight_u'] = self.load_tf_tensor(name_tf, self.u).squeeze()\n    self.state_dict[name_pth + '.module.weight_v'] = self.load_tf_tensor(name_tf, self.v).squeeze()\n    self.state_dict[name_pth + '.module.weight_bar'] = self.load_tf_tensor(name_tf, self.w).permute(1, 0)\n    if bias:\n      self.state_dict[name_pth + '.module.bias'] = self.load_tf_tensor(name_tf, self.b)\n\n  def load_colorize(self, name_pth, name_tf):\n    self.load_snconv(name_pth, name_tf)\n\n  def load_GBlock(self, name_pth, name_tf):\n    self.load_convs(name_pth, name_tf)\n    self.load_HyperBNs(name_pth, name_tf)\n\n  def load_convs(self, name_pth, name_tf):\n    self.load_snconv(name_pth + 'conv0', os.path.join(name_tf, 'conv0'))\n    self.load_snconv(name_pth + 'conv1', os.path.join(name_tf, 'conv1'))\n    self.load_snconv(name_pth + 'conv_sc', os.path.join(name_tf, 'conv_sc'))\n\n  def load_snconv(self, name_pth, name_tf, bias=True):\n    if self.verbose:\n      print(f'loading: {name_pth} from {name_tf}')\n    self.state_dict[name_pth + '.module.weight_u'] = self.load_tf_tensor(name_tf, self.u).squeeze()\n    self.state_dict[name_pth + '.module.weight_v'] = self.load_tf_tensor(name_tf, self.v).squeeze()\n    self.state_dict[name_pth + '.module.weight_bar'] = self.load_tf_tensor(name_tf, self.w).permute(3, 2, 0, 1)\n    if bias:\n      self.state_dict[name_pth + '.module.bias'] = self.load_tf_tensor(name_tf, self.b).squeeze()\n\n  def load_conv(self, name_pth, name_tf, bias=True):\n\n    self.state_dict[name_pth + '.weight_u'] = self.load_tf_tensor(name_tf, self.u).squeeze()\n    self.state_dict[name_pth + '.weight_v'] = self.load_tf_tensor(name_tf, self.v).squeeze()\n    self.state_dict[name_pth + '.weight_bar'] = self.load_tf_tensor(name_tf, self.w).permute(3, 2, 0, 1)\n    if bias:\n      self.state_dict[name_pth + '.bias'] = self.load_tf_tensor(name_tf, self.b)\n\n  def load_HyperBNs(self, name_pth, name_tf):\n    self.load_HyperBN(name_pth + 'HyperBN', os.path.join(name_tf, 'HyperBN'))\n    self.load_HyperBN(name_pth + 'HyperBN_1', os.path.join(name_tf, 'HyperBN_1'))\n\n  def load_ScaledCrossReplicaBNs(self, name_pth, name_tf):\n    self.state_dict[name_pth + '.bias'] = self.load_tf_tensor(name_tf, self.beta).squeeze()\n    self.state_dict[name_pth + '.weight'] = self.load_tf_tensor(name_tf, self.gamma).squeeze()\n    self.state_dict[name_pth + '.running_mean'] = self.load_tf_tensor(name_tf + 'bn', 'accumulated_mean')\n    self.state_dict[name_pth + '.running_var'] = self.load_tf_tensor(name_tf + 'bn', 'accumulated_var')\n    self.state_dict[name_pth + '.num_batches_tracked'] = torch.tensor(\n      self.tf_weights[os.path.join(name_tf + 'bn', 'accumulation_counter:0')][()], dtype=torch.float32)\n\n  def load_HyperBN(self, name_pth, name_tf):\n    if self.verbose:\n      print(f'loading: {name_pth} from {name_tf}')\n    beta = name_pth + '.beta_embed.module'\n    gamma = name_pth + '.gamma_embed.module'\n    self.state_dict[beta + '.weight_u'] = self.load_tf_tensor(os.path.join(name_tf, 'beta'), self.u).squeeze()\n    self.state_dict[gamma + '.weight_u'] = self.load_tf_tensor(os.path.join(name_tf, 'gamma'), self.u).squeeze()\n    self.state_dict[beta + '.weight_v'] = self.load_tf_tensor(os.path.join(name_tf, 'beta'), self.v).squeeze()\n    self.state_dict[gamma + '.weight_v'] = self.load_tf_tensor(os.path.join(name_tf, 'gamma'), self.v).squeeze()\n    self.state_dict[beta + '.weight_bar'] = self.load_tf_tensor(os.path.join(name_tf, 'beta'), self.w).permute(1, 0)\n    self.state_dict[gamma +\n            '.weight_bar'] = self.load_tf_tensor(os.path.join(name_tf, 'gamma'), self.w).permute(1, 0)\n\n    cr_bn_name = name_tf.replace('HyperBN', 'CrossReplicaBN')\n    self.state_dict[name_pth + '.bn.running_mean'] = self.load_tf_tensor(cr_bn_name, 'accumulated_mean')\n    self.state_dict[name_pth + '.bn.running_var'] = self.load_tf_tensor(cr_bn_name, 'accumulated_var')\n    self.state_dict[name_pth + '.bn.num_batches_tracked'] = torch.tensor(\n      self.tf_weights[os.path.join(cr_bn_name, 'accumulation_counter:0')][()], dtype=torch.float32)\n\n  def load_attention(self, name_pth, name_tf):\n\n    self.load_snconv(name_pth + 'theta', os.path.join(name_tf, 'theta'), bias=False)\n    self.load_snconv(name_pth + 'phi', os.path.join(name_tf, 'phi'), bias=False)\n    self.load_snconv(name_pth + 'g', os.path.join(name_tf, 'g'), bias=False)\n    self.load_snconv(name_pth + 'o_conv', os.path.join(name_tf, 'o_conv'), bias=False)\n    self.state_dict[name_pth + 'gamma'] = self.load_tf_tensor(name_tf, self.gamma)\n\n  def load_tf_tensor(self, prefix, var, device='0'):\n    name = os.path.join(prefix, var) + f':{device}'\n    return torch.from_numpy(self.tf_weights[name][:])\n\n# Convert from v1: This function maps \ndef convert_from_v1(hub_dict, resolution=128):\n  weightname_dict = {'weight_u': 'u0', 'weight_bar': 'weight', 'bias': 'bias'}\n  convnum_dict = {'conv0': 'conv1', 'conv1': 'conv2', 'conv_sc': 'conv_sc'}\n  attention_blocknum = {128: 3, 256: 4, 512: 3}[resolution]\n  hub2me = {'linear.weight': 'shared.weight', # This is actually the shared weight \n          # Linear stuff\n          'G_linear.module.weight_bar': 'linear.weight', \n          'G_linear.module.bias': 'linear.bias',\n          'G_linear.module.weight_u': 'linear.u0',\n          # output layer stuff\n          'ScaledCrossReplicaBN.weight': 'output_layer.0.gain', \n          'ScaledCrossReplicaBN.bias': 'output_layer.0.bias',\n          'ScaledCrossReplicaBN.running_mean': 'output_layer.0.stored_mean',\n          'ScaledCrossReplicaBN.running_var': 'output_layer.0.stored_var',\n          'colorize.module.weight_bar': 'output_layer.2.weight', \n          'colorize.module.bias': 'output_layer.2.bias',\n          'colorize.module.weight_u':  'output_layer.2.u0',\n          # Attention stuff\n          'attention.gamma': 'blocks.%d.1.gamma' % attention_blocknum, \n          'attention.theta.module.weight_u': 'blocks.%d.1.theta.u0' % attention_blocknum,\n          'attention.theta.module.weight_bar': 'blocks.%d.1.theta.weight' % attention_blocknum, \n          'attention.phi.module.weight_u':  'blocks.%d.1.phi.u0' % attention_blocknum,\n          'attention.phi.module.weight_bar': 'blocks.%d.1.phi.weight' % attention_blocknum,\n          'attention.g.module.weight_u': 'blocks.%d.1.g.u0' % attention_blocknum,\n          'attention.g.module.weight_bar': 'blocks.%d.1.g.weight' % attention_blocknum, \n          'attention.o_conv.module.weight_u': 'blocks.%d.1.o.u0' % attention_blocknum,\n          'attention.o_conv.module.weight_bar':'blocks.%d.1.o.weight' % attention_blocknum, \n          }\n\n  # Loop over the hub dict and build the hub2me map\n  for name in hub_dict.keys():\n    if 'GBlock' in name:\n      if 'HyperBN' not in name: # it's a conv\n        out = parse.parse('GBlock.{:d}.{}.module.{}',name)\n        blocknum, convnum, weightname = out\n        if weightname not in weightname_dict:\n          continue # else hyperBN in \n        out_name = 'blocks.%d.0.%s.%s' % (blocknum, convnum_dict[convnum], weightname_dict[weightname]) # Increment conv number by 1\n      else: # hyperbn not conv\n        BNnum = 2 if 'HyperBN_1' in name else 1\n        if 'embed' in name:        \n          out = parse.parse('GBlock.{:d}.{}.module.{}',name)\n          blocknum, gamma_or_beta, weightname = out\n          if weightname not in weightname_dict: # Ignore weight_v\n            continue\n          out_name = 'blocks.%d.0.bn%d.%s.%s' % (blocknum, BNnum, 'gain' if 'gamma' in gamma_or_beta else 'bias', weightname_dict[weightname])\n        else:\n           out = parse.parse('GBlock.{:d}.{}.bn.{}',name)\n           blocknum, dummy, mean_or_var = out\n           if 'num_batches_tracked' in mean_or_var:\n            continue\n           out_name = 'blocks.%d.0.bn%d.%s' % (blocknum, BNnum, 'stored_mean' if 'mean' in mean_or_var else 'stored_var')\n      hub2me[name] = out_name\n\n\n  # Invert the hub2me map\n  me2hub = {hub2me[item]: item for item in hub2me}\n  new_dict = {}\n  dimz_dict = {128: 20, 256: 20, 512:16} \n  for item in me2hub:\n    # Swap input dim ordering on batchnorm bois to account for my arbitrary change of ordering when concatenating Ys and Zs  \n    if ('bn' in item and 'weight' in item) and ('gain' in item or 'bias' in item) and ('output_layer' not in item):\n      new_dict[item] = torch.cat([hub_dict[me2hub[item]][:, -128:], hub_dict[me2hub[item]][:, :dimz_dict[resolution]]], 1)\n    # Reshape the first linear weight, bias, and u0\n    elif item == 'linear.weight':\n      new_dict[item] = hub_dict[me2hub[item]].contiguous().view(4, 4, 96 * 16, -1).permute(2,0,1,3).contiguous().view(-1,dimz_dict[resolution])\n    elif item == 'linear.bias':\n      new_dict[item] = hub_dict[me2hub[item]].view(4, 4, 96  * 16).permute(2,0,1).contiguous().view(-1)\n    elif item == 'linear.u0':\n       new_dict[item] = hub_dict[me2hub[item]].view(4, 4, 96  * 16).permute(2,0,1).contiguous().view(1, -1)\n    elif me2hub[item] == 'linear.weight': # THIS IS THE SHARED WEIGHT NOT THE FIRST LINEAR LAYER\n      # Transpose shared weight so that it's an embedding\n      new_dict[item] = hub_dict[me2hub[item]].t()\n    elif 'weight_u' in me2hub[item]: # Unsqueeze u0s    \n      new_dict[item] = hub_dict[me2hub[item]].unsqueeze(0)\n    else:\n      new_dict[item] = hub_dict[me2hub[item]]      \n  return new_dict\n\ndef get_config(resolution):\n  attn_dict = {128: '64', 256: '128', 512: '64'}\n  dim_z_dict = {128: 120, 256: 140, 512: 128}\n  config = {'G_param': 'SN', 'D_param': 'SN', \n           'G_ch': 96, 'D_ch': 96, \n           'D_wide': True, 'G_shared': True, \n           'shared_dim': 128, 'dim_z': dim_z_dict[resolution], \n           'hier': True, 'cross_replica': False, \n           'mybn': False, 'G_activation': nn.ReLU(inplace=True),\n           'G_attn': attn_dict[resolution],\n           'norm_style': 'bn',\n           'G_init': 'ortho', 'skip_init': True, 'no_optim': True,\n           'G_fp16': False, 'G_mixed_precision': False,\n           'accumulate_stats': False, 'num_standing_accumulations': 16, \n           'G_eval_mode': True,\n           'BN_eps': 1e-04, 'SN_eps': 1e-04, \n           'num_G_SVs': 1, 'num_G_SV_itrs': 1, 'resolution': resolution, \n           'n_classes': 1000}\n  return config\n\n\ndef convert_biggan(resolution, weight_dir, redownload=False, no_ema=False, verbose=False):\n  module_path = MODULE_PATH_TMPL.format(resolution)\n  hdf5_path = os.path.join(weight_dir, HDF5_TMPL.format(resolution))\n  pth_path = os.path.join(weight_dir, PTH_TMPL.format(resolution))\n\n  tf_weights = dump_tfhub_to_hdf5(module_path, hdf5_path, redownload=redownload)\n  G_temp = getattr(biggan_for_conversion, f'Generator{resolution}')()\n  state_dict_temp = G_temp.state_dict()\n\n  converter = TFHub2Pytorch(state_dict_temp, tf_weights, resolution=resolution,\n                load_ema=(not no_ema), verbose=verbose)\n  state_dict_v1 = converter.load()\n  state_dict = convert_from_v1(state_dict_v1, resolution)\n  # Get the config, build the model\n  config = get_config(resolution)\n  G = BigGAN.Generator(**config)\n  G.load_state_dict(state_dict, strict=False) # Ignore missing sv0 entries\n  torch.save(state_dict, pth_path)\n  \n  # output_location ='pretrained_weights/TFHub-PyTorch-128.pth'\n  \n  return G\n\n\ndef generate_sample(G, z_dim, batch_size, filename, parallel=False):\n  \n  G.eval()\n  G.to(DEVICE)\n  with torch.no_grad():\n    z = torch.randn(batch_size, G.dim_z).to(DEVICE)\n    y = torch.randint(low=0, high=1000, size=(batch_size,), \n        device=DEVICE, dtype=torch.int64, requires_grad=False)\n    if parallel:\n      images = nn.parallel.data_parallel(G, (z, G.shared(y)))\n    else:\n      images = G(z, G.shared(y))\n  save_image(images, filename, scale_each=True, normalize=True)\n\ndef parse_args():\n  usage = 'Parser for conversion script.'\n  parser = argparse.ArgumentParser(description=usage)\n  parser.add_argument(\n    '--resolution', '-r', type=int, default=None, choices=[128, 256, 512],\n    help='Resolution of TFHub module to convert. Converts all resolutions if None.')\n  parser.add_argument(\n    '--redownload', action='store_true', default=False,\n    help='Redownload weights and overwrite current hdf5 file, if present.')\n  parser.add_argument(\n    '--weights_dir', type=str, default='pretrained_weights')\n  parser.add_argument(\n    '--samples_dir', type=str, default='pretrained_samples')\n  parser.add_argument(\n    '--no_ema', action='store_true', default=False,\n    help='Do not load ema weights.')\n  parser.add_argument(\n    '--verbose', action='store_true', default=False,\n    help='Additionally logging.')\n  parser.add_argument(\n    '--generate_samples', action='store_true', default=False,\n    help='Generate test sample with pretrained model.')\n  parser.add_argument(\n    '--batch_size', type=int, default=64,\n    help='Batch size used for test sample.')\n  parser.add_argument(\n    '--parallel', action='store_true', default=False,\n    help='Parallelize G?')     \n  args = parser.parse_args()\n  return args\n\n\nif __name__ == '__main__':\n\n  args = parse_args()\n  os.makedirs(args.weights_dir, exist_ok=True)\n  os.makedirs(args.samples_dir, exist_ok=True)\n\n  if args.resolution is not None:\n    G = convert_biggan(args.resolution, args.weights_dir,\n               redownload=args.redownload,\n               no_ema=args.no_ema, verbose=args.verbose)\n    if args.generate_samples:\n      filename = os.path.join(args.samples_dir, f'biggan{args.resolution}_samples.jpg')\n      print('Generating samples...')\n      generate_sample(G, Z_DIMS[args.resolution], args.batch_size, filename, args.parallel)\n  else:\n    for res in RESOLUTIONS:\n      G = convert_biggan(res, args.weights_dir,\n                 redownload=args.redownload,\n                 no_ema=args.no_ema, verbose=args.verbose)\n      if args.generate_samples:\n        filename = os.path.join(args.samples_dir, f'biggan{res}_samples.jpg')\n        print('Generating samples...')\n        generate_sample(G, Z_DIMS[res], args.batch_size, filename, args.parallel)\n"
  },
  {
    "path": "BigGAN_utils/__init__.py",
    "content": "import sys\nsys.path.append('./BigGAN_utils/')\n"
  },
  {
    "path": "BigGAN_utils/animal_hash.py",
    "content": "c = ['Aardvark', 'Abyssinian', 'Affenpinscher', 'Akbash', 'Akita', 'Albatross',\n     'Alligator', 'Alpaca', 'Angelfish', 'Ant', 'Anteater', 'Antelope', 'Ape',\n     'Armadillo', 'Ass', 'Avocet', 'Axolotl', 'Baboon', 'Badger', 'Balinese',\n     'Bandicoot', 'Barb', 'Barnacle', 'Barracuda', 'Bat', 'Beagle', 'Bear',\n     'Beaver', 'Bee', 'Beetle', 'Binturong', 'Bird', 'Birman', 'Bison',\n     'Bloodhound', 'Boar', 'Bobcat', 'Bombay', 'Bongo', 'Bonobo', 'Booby',\n     'Budgerigar', 'Buffalo', 'Bulldog', 'Bullfrog', 'Burmese', 'Butterfly',\n     'Caiman', 'Camel', 'Capybara', 'Caracal', 'Caribou', 'Cassowary', 'Cat',\n     'Caterpillar', 'Catfish', 'Cattle', 'Centipede', 'Chameleon', 'Chamois',\n     'Cheetah', 'Chicken', 'Chihuahua', 'Chimpanzee', 'Chinchilla', 'Chinook', \n     'Chipmunk', 'Chough', 'Cichlid', 'Clam', 'Coati', 'Cobra', 'Cockroach',\n     'Cod', 'Collie', 'Coral', 'Cormorant', 'Cougar', 'Cow', 'Coyote', \n     'Crab', 'Crane', 'Crocodile', 'Crow', 'Curlew', 'Cuscus', 'Cuttlefish',\n     'Dachshund', 'Dalmatian', 'Deer', 'Dhole', 'Dingo', 'Dinosaur', 'Discus',\n     'Dodo', 'Dog', 'Dogball', 'Dogfish', 'Dolphin', 'Donkey', 'Dormouse',\n     'Dove', 'Dragonfly', 'Drever', 'Duck', 'Dugong', 'Dunker', 'Dunlin', \n     'Eagle', 'Earwig', 'Echidna', 'Eel', 'Eland', 'Elephant', 'ElephantSeal',\n     'Elk', 'Emu', 'Falcon', 'Ferret', 'Finch', 'Fish', 'Flamingo', 'Flounder',\n     'Fly', 'Fossa', 'Fox', 'Frigatebird', 'Frog', 'Galago', 'Gar', 'Gaur', \n     'Gazelle', 'Gecko', 'Gerbil', 'Gharial', 'GiantPanda', 'Gibbon', 'Giraffe',\n     'Gnat', 'Gnu', 'Goat', 'Goldfinch', 'Goldfish', 'Goose', 'Gopher',\n     'Gorilla', 'Goshawk', 'Grasshopper', 'Greyhound', 'Grouse', 'Guanaco', \n     'GuineaFowl', 'GuineaPig', 'Gull', 'Guppy', 'Hamster', 'Hare', 'Harrier', \n     'Havanese', 'Hawk', 'Hedgehog', 'Heron', 'Herring', 'Himalayan', \n     'Hippopotamus', 'Hornet', 'Horse', 'Human', 'Hummingbird', 'Hyena', \n     'Ibis', 'Iguana', 'Impala', 'Indri', 'Insect', 'Jackal', 'Jaguar', \n     'Javanese', 'Jay', 'Jellyfish', 'Kakapo', 'Kangaroo', 'Kingfisher', \n     'Kiwi', 'Koala', 'KomodoDragon', 'Kouprey', 'Kudu', 'Labradoodle', \n     'Ladybird', 'Lapwing', 'Lark', 'Lemming', 'Lemur', 'Leopard', 'Liger',\n     'Lion', 'Lionfish', 'Lizard', 'Llama', 'Lobster', 'Locust', 'Loris', \n     'Louse', 'Lynx', 'Lyrebird', 'Macaw', 'Magpie', 'Mallard', 'Maltese',\n     'Manatee', 'Mandrill', 'Markhor', 'Marten', 'Mastiff', 'Mayfly', 'Meerkat',\n     'Millipede', 'Mink', 'Mole', 'Molly', 'Mongoose', 'Mongrel', 'Monkey',\n     'Moorhen', 'Moose', 'Mosquito', 'Moth', 'Mouse', 'Mule', 'Narwhal',\n     'Neanderthal', 'Newfoundland', 'Newt', 'Nightingale', 'Numbat', 'Ocelot',\n     'Octopus', 'Okapi', 'Olm', 'Opossum', 'Orang-utan', 'Oryx', 'Ostrich', \n     'Otter', 'Owl', 'Ox', 'Oyster', 'Pademelon', 'Panther', 'Parrot',\n     'Partridge', 'Peacock', 'Peafowl', 'Pekingese', 'Pelican', 'Penguin', \n     'Persian', 'Pheasant', 'Pig', 'Pigeon', 'Pika', 'Pike', 'Piranha', \n     'Platypus', 'Pointer', 'Pony', 'Poodle', 'Porcupine', 'Porpoise',\n     'Possum', 'PrairieDog', 'Prawn', 'Puffin', 'Pug', 'Puma', 'Quail', \n     'Quelea', 'Quetzal', 'Quokka', 'Quoll', 'Rabbit', 'Raccoon', 'Ragdoll', \n     'Rail', 'Ram', 'Rat', 'Rattlesnake', 'Raven', 'RedDeer', 'RedPanda',\n     'Reindeer', 'Rhinoceros', 'Robin', 'Rook', 'Rottweiler', 'Ruff',\n     'Salamander', 'Salmon', 'SandDollar', 'Sandpiper', 'Saola', \n     'Sardine', 'Scorpion', 'SeaLion', 'SeaUrchin', 'Seahorse',\n     'Seal', 'Serval', 'Shark', 'Sheep', 'Shrew', 'Shrimp', 'Siamese',\n     'Siberian', 'Skunk', 'Sloth', 'Snail', 'Snake', 'Snowshoe', 'Somali', \n     'Sparrow', 'Spider', 'Sponge', 'Squid', 'Squirrel', 'Starfish', 'Starling',\n     'Stingray', 'Stinkbug', 'Stoat', 'Stork', 'Swallow', 'Swan', 'Tang', \n     'Tapir', 'Tarsier', 'Termite', 'Tetra', 'Tiffany', 'Tiger', 'Toad', \n     'Tortoise', 'Toucan', 'Tropicbird', 'Trout', 'Tuatara', 'Turkey', \n     'Turtle', 'Uakari', 'Uguisu', 'Umbrellabird', 'Viper', 'Vulture',\n     'Wallaby', 'Walrus', 'Warthog', 'Wasp', 'WaterBuffalo', 'Weasel',\n     'Whale', 'Whippet', 'Wildebeest', 'Wolf', 'Wolverine', 'Wombat', \n     'Woodcock', 'Woodlouse', 'Woodpecker', 'Worm', 'Wrasse', 'Wren', \n     'Yak', 'Zebra', 'Zebu', 'Zonkey']\na = ['able', 'above', 'absent', 'absolute', 'abstract', 'abundant', 'academic',\n     'acceptable', 'accepted', 'accessible', 'accurate', 'accused', 'active', \n     'actual', 'acute', 'added', 'additional', 'adequate', 'adjacent', \n     'administrative', 'adorable', 'advanced', 'adverse', 'advisory', \n     'aesthetic', 'afraid', 'african', 'aggregate', 'aggressive', 'agreeable', \n     'agreed', 'agricultural', 'alert', 'alive', 'alleged', 'allied', 'alone', \n     'alright', 'alternative', 'amateur', 'amazing', 'ambitious', 'american', \n     'amused', 'ancient', 'angry', 'annoyed', 'annual', 'anonymous', 'anxious', \n     'appalling', 'apparent', 'applicable', 'appropriate', 'arab', 'arbitrary',\n     'architectural', 'armed', 'arrogant', 'artificial', 'artistic', 'ashamed', \n     'asian', 'asleep', 'assistant', 'associated', 'atomic', 'attractive',\n     'australian', 'automatic', 'autonomous', 'available', 'average',\n     'awake', 'aware', 'awful', 'awkward', 'back', 'bad', 'balanced', 'bare', \n     'basic', 'beautiful', 'beneficial', 'better', 'bewildered', 'big', \n     'binding', 'biological', 'bitter', 'bizarre', 'black', 'blank', 'blind', \n     'blonde', 'bloody', 'blue', 'blushing', 'boiling', 'bold', 'bored', \n     'boring', 'bottom', 'brainy', 'brave', 'breakable', 'breezy', 'brief', \n     'bright', 'brilliant', 'british', 'broad', 'broken', 'brown', 'bumpy', \n     'burning', 'busy', 'calm', 'canadian', 'capable', 'capitalist', 'careful',\n     'casual', 'catholic', 'causal', 'cautious', 'central', 'certain', \n     'changing', 'characteristic', 'charming', 'cheap', 'cheerful', 'chemical', \n     'chief', 'chilly', 'chinese', 'chosen', 'christian', 'chronic', 'chubby', \n     'circular', 'civic', 'civil', 'civilian', 'classic', 'classical', 'clean',\n     'clear', 'clever', 'clinical', 'close', 'closed', 'cloudy', 'clumsy', \n     'coastal', 'cognitive', 'coherent', 'cold', 'collective', 'colonial', \n     'colorful', 'colossal', 'coloured', 'colourful', 'combative', 'combined',\n     'comfortable', 'coming', 'commercial', 'common', 'communist', 'compact', \n     'comparable', 'comparative', 'compatible', 'competent', 'competitive', \n     'complete', 'complex', 'complicated', 'comprehensive', 'compulsory',\n     'conceptual', 'concerned', 'concrete', 'condemned', 'confident', \n     'confidential', 'confused', 'conscious', 'conservation', 'conservative',\n     'considerable', 'consistent', 'constant', 'constitutional', \n     'contemporary', 'content', 'continental', 'continued', 'continuing', \n     'continuous', 'controlled', 'controversial', 'convenient', 'conventional',\n     'convinced', 'convincing', 'cooing', 'cool', 'cooperative', 'corporate',\n     'correct', 'corresponding', 'costly', 'courageous', 'crazy', 'creative', \n     'creepy', 'criminal', 'critical', 'crooked', 'crowded', 'crucial', \n     'crude', 'cruel', 'cuddly', 'cultural', 'curious', 'curly', 'current', \n     'curved', 'cute', 'daily', 'damaged', 'damp', 'dangerous', 'dark', 'dead',\n     'deaf', 'deafening', 'dear', 'decent', 'decisive', 'deep', 'defeated', \n     'defensive', 'defiant', 'definite', 'deliberate', 'delicate', 'delicious',\n     'delighted', 'delightful', 'democratic', 'dependent', 'depressed', \n     'desirable', 'desperate', 'detailed', 'determined', 'developed', \n     'developing', 'devoted', 'different', 'difficult', 'digital', 'diplomatic', \n     'direct', 'dirty', 'disabled', 'disappointed', 'disastrous', \n     'disciplinary', 'disgusted', 'distant', 'distinct', 'distinctive',\n     'distinguished', 'disturbed', 'disturbing', 'diverse', 'divine', 'dizzy', \n     'domestic', 'dominant', 'double', 'doubtful', 'drab', 'dramatic',\n     'dreadful', 'driving', 'drunk', 'dry', 'dual', 'due', 'dull', 'dusty',\n     'dutch', 'dying', 'dynamic', 'eager', 'early', 'eastern', 'easy', \n     'economic', 'educational', 'eerie', 'effective', 'efficient', \n     'elaborate', 'elated', 'elderly', 'eldest', 'electoral', 'electric',\n     'electrical', 'electronic', 'elegant', 'eligible', 'embarrassed',\n     'embarrassing', 'emotional', 'empirical', 'empty', 'enchanting',\n     'encouraging', 'endless', 'energetic', 'english', 'enormous', \n     'enthusiastic', 'entire', 'entitled', 'envious', 'environmental', 'equal', \n     'equivalent', 'essential', 'established', 'estimated', 'ethical', \n     'ethnic', 'european', 'eventual', 'everyday', 'evident', 'evil', \n     'evolutionary', 'exact', 'excellent', 'exceptional', 'excess', \n     'excessive', 'excited', 'exciting', 'exclusive', 'existing', 'exotic', \n     'expected', 'expensive', 'experienced', 'experimental', 'explicit',\n     'extended', 'extensive', 'external', 'extra', 'extraordinary', 'extreme', \n     'exuberant', 'faint', 'fair', 'faithful', 'familiar', 'famous', 'fancy',\n     'fantastic', 'far', 'fascinating', 'fashionable', 'fast', 'fat', 'fatal', \n     'favourable', 'favourite', 'federal', 'fellow', 'female', 'feminist', \n     'few', 'fierce', 'filthy', 'final', 'financial', 'fine', 'firm', 'fiscal', \n     'fit', 'fixed', 'flaky', 'flat', 'flexible', 'fluffy', 'fluttering', \n     'flying', 'following', 'fond', 'foolish', 'foreign', 'formal', \n     'formidable', 'forthcoming', 'fortunate', 'forward', 'fragile', \n     'frail', 'frantic', 'free', 'french', 'frequent', 'fresh', 'friendly', \n     'frightened', 'front', 'frozen', 'fucking', 'full', 'full-time', 'fun',\n     'functional', 'fundamental', 'funny', 'furious', 'future', 'fuzzy',\n     'gastric', 'gay', 'general', 'generous', 'genetic', 'gentle', 'genuine',\n     'geographical', 'german', 'giant', 'gigantic', 'given', 'glad',\n     'glamorous', 'gleaming', 'global', 'glorious', 'golden', 'good', \n     'gorgeous', 'gothic', 'governing', 'graceful', 'gradual', 'grand', \n     'grateful', 'greasy', 'great', 'greek', 'green', 'grey', 'grieving',\n     'grim', 'gross', 'grotesque', 'growing', 'grubby', 'grumpy', 'guilty',\n     'handicapped', 'handsome', 'happy', 'hard', 'harsh', 'head', 'healthy', \n     'heavy', 'helpful', 'helpless', 'hidden', 'high', 'high-pitched',\n     'hilarious', 'hissing', 'historic', 'historical', 'hollow', 'holy',\n     'homeless', 'homely', 'hon', 'honest', 'horizontal', 'horrible', \n     'hostile', 'hot', 'huge', 'human', 'hungry', 'hurt', 'hushed', 'husky',\n     'icy', 'ideal', 'identical', 'ideological', 'ill', 'illegal', \n     'imaginative', 'immediate', 'immense', 'imperial', 'implicit', \n     'important', 'impossible', 'impressed', 'impressive', 'improved', \n     'inadequate', 'inappropriate', 'inc', 'inclined', 'increased', \n     'increasing', 'incredible', 'independent', 'indian', 'indirect', \n     'individual', 'industrial', 'inevitable', 'influential', 'informal',\n     'inherent', 'initial', 'injured', 'inland', 'inner', 'innocent', \n     'innovative', 'inquisitive', 'instant', 'institutional', 'insufficient',\n     'intact', 'integral', 'integrated', 'intellectual', 'intelligent', \n     'intense', 'intensive', 'interested', 'interesting', 'interim', \n     'interior', 'intermediate', 'internal', 'international', 'intimate',\n     'invisible', 'involved', 'iraqi', 'irish', 'irrelevant', 'islamic',\n     'isolated', 'israeli', 'italian', 'itchy', 'japanese', 'jealous', \n     'jewish', 'jittery', 'joint', 'jolly', 'joyous', 'judicial', 'juicy', \n     'junior', 'just', 'keen', 'key', 'kind', 'known', 'korean', 'labour', \n     'large', 'large-scale', 'late', 'latin', 'lazy', 'leading', 'left', \n     'legal', 'legislative', 'legitimate', 'lengthy', 'lesser', 'level', \n     'lexical', 'liable', 'liberal', 'light', 'like', 'likely', 'limited', \n     'linear', 'linguistic', 'liquid', 'literary', 'little', 'live', 'lively', \n     'living', 'local', 'logical', 'lonely', 'long', 'long-term', 'loose', \n     'lost', 'loud', 'lovely', 'low', 'loyal', 'ltd', 'lucky', 'mad',\n     'magenta', 'magic', 'magnetic', 'magnificent', 'main', 'major', 'male',\n     'mammoth', 'managerial', 'managing', 'manual', 'many', 'marginal', \n     'marine', 'marked', 'married', 'marvellous', 'marxist', 'mass', 'massive', \n     'mathematical', 'mature', 'maximum', 'mean', 'meaningful', 'mechanical',\n     'medical', 'medieval', 'melodic', 'melted', 'mental', 'mere', \n     'metropolitan', 'mid', 'middle', 'middle-class', 'mighty', 'mild',\n     'military', 'miniature', 'minimal', 'minimum', 'ministerial', 'minor', \n     'miserable', 'misleading', 'missing', 'misty', 'mixed', 'moaning', \n     'mobile', 'moderate', 'modern', 'modest', 'molecular', 'monetary', \n     'monthly', 'moral', 'motionless', 'muddy', 'multiple', 'mushy', \n     'musical', 'mute', 'mutual', 'mysterious', 'naked', 'narrow', 'nasty',\n     'national', 'native', 'natural', 'naughty', 'naval', 'near', 'nearby', \n     'neat', 'necessary', 'negative', 'neighbouring', 'nervous', 'net', \n     'neutral', 'new', 'nice', 'nineteenth-century', 'noble', 'noisy', \n     'normal', 'northern', 'nosy', 'notable', 'novel', 'nuclear', 'numerous',\n     'nursing', 'nutritious', 'nutty', 'obedient', 'objective', 'obliged', \n     'obnoxious', 'obvious', 'occasional', 'occupational', 'odd', 'official',\n     'ok', 'okay', 'old', 'old-fashioned', 'olympic', 'only', 'open', \n     'operational', 'opposite', 'optimistic', 'oral', 'orange', 'ordinary', \n     'organic', 'organisational', 'original', 'orthodox', 'other', 'outdoor', \n     'outer', 'outrageous', 'outside', 'outstanding', 'overall', 'overseas',\n     'overwhelming', 'painful', 'pale', 'palestinian', 'panicky', 'parallel', \n     'parental', 'parliamentary', 'part-time', 'partial', 'particular', \n     'passing', 'passive', 'past', 'patient', 'payable', 'peaceful', \n     'peculiar', 'perfect', 'permanent', 'persistent', 'personal', 'petite',\n     'philosophical', 'physical', 'pink', 'plain', 'planned', 'plastic',\n     'pleasant', 'pleased', 'poised', 'polish', 'polite', 'political', 'poor', \n     'popular', 'positive', 'possible', 'post-war', 'potential', 'powerful',\n     'practical', 'precious', 'precise', 'preferred', 'pregnant', \n     'preliminary', 'premier', 'prepared', 'present', 'presidential', \n     'pretty', 'previous', 'prickly', 'primary', 'prime', 'primitive', \n     'principal', 'printed', 'prior', 'private', 'probable', 'productive',\n     'professional', 'profitable', 'profound', 'progressive', 'prominent', \n     'promising', 'proper', 'proposed', 'prospective', 'protective', \n     'protestant', 'proud', 'provincial', 'psychiatric', 'psychological',\n     'public', 'puny', 'pure', 'purple', 'purring', 'puzzled', 'quaint', \n     'qualified', 'quick', 'quickest', 'quiet', 'racial', 'radical', 'rainy',\n     'random', 'rapid', 'rare', 'raspy', 'rational', 'ratty', 'raw', 'ready', \n     'real', 'realistic', 'rear', 'reasonable', 'recent', 'red', 'reduced',\n     'redundant', 'regional', 'registered', 'regular', 'regulatory', 'related', \n     'relative', 'relaxed', 'relevant', 'reliable', 'relieved', 'religious',\n     'reluctant', 'remaining', 'remarkable', 'remote', 'renewed',\n     'representative', 'repulsive', 'required', 'resident', 'residential',\n     'resonant', 'respectable', 'respective', 'responsible', 'resulting',\n     'retail', 'retired', 'revolutionary', 'rich', 'ridiculous', 'right',\n     'rigid', 'ripe', 'rising', 'rival', 'roasted', 'robust', 'rolling', \n     'roman', 'romantic', 'rotten', 'rough', 'round', 'royal', 'rubber',\n     'rude', 'ruling', 'running', 'rural', 'russian', 'sacred', 'sad', 'safe',\n     'salty', 'satisfactory', 'satisfied', 'scared', 'scary', 'scattered',\n     'scientific', 'scornful', 'scottish', 'scrawny', 'screeching', \n     'secondary', 'secret', 'secure', 'select', 'selected', 'selective', \n     'selfish', 'semantic', 'senior', 'sensible', 'sensitive', 'separate',\n     'serious', 'severe', 'sexual', 'shaggy', 'shaky', 'shallow', 'shared', \n     'sharp', 'sheer', 'shiny', 'shivering', 'shocked', 'short', 'short-term', \n     'shrill', 'shy', 'sick', 'significant', 'silent', 'silky', 'silly', \n     'similar', 'simple', 'single', 'skilled', 'skinny', 'sleepy', 'slight',\n     'slim', 'slimy', 'slippery', 'slow', 'small', 'smart', 'smiling', \n     'smoggy', 'smooth', 'so-called', 'social', 'socialist', 'soft', 'solar',\n     'sole', 'solid', 'sophisticated', 'sore', 'sorry', 'sound', 'sour', \n     'southern', 'soviet', 'spanish', 'spare', 'sparkling', 'spatial', \n     'special', 'specific', 'specified', 'spectacular', 'spicy', 'spiritual',\n     'splendid', 'spontaneous', 'sporting', 'spotless', 'spotty', 'square', \n     'squealing', 'stable', 'stale', 'standard', 'static', 'statistical', \n     'statutory', 'steady', 'steep', 'sticky', 'stiff', 'still', 'stingy',\n     'stormy', 'straight', 'straightforward', 'strange', 'strategic',\n     'strict', 'striking', 'striped', 'strong', 'structural', 'stuck', \n     'stupid', 'subjective', 'subsequent', 'substantial', 'subtle', \n     'successful', 'successive', 'sudden', 'sufficient', 'suitable',\n     'sunny', 'super', 'superb', 'superior', 'supporting', 'supposed',\n     'supreme', 'sure', 'surprised', 'surprising', 'surrounding', \n     'surviving', 'suspicious', 'sweet', 'swift', 'swiss', 'symbolic',\n     'sympathetic', 'systematic', 'tall', 'tame', 'tan', 'tart',\n     'tasteless', 'tasty', 'technical', 'technological', 'teenage', \n     'temporary', 'tender', 'tense', 'terrible', 'territorial', 'testy',\n     'then', 'theoretical', 'thick', 'thin', 'thirsty', 'thorough', \n     'thoughtful', 'thoughtless', 'thundering', 'tight', 'tiny', 'tired',\n     'top', 'tory', 'total', 'tough', 'toxic', 'traditional', 'tragic', \n     'tremendous', 'tricky', 'tropical', 'troubled', 'turkish', 'typical', \n     'ugliest', 'ugly', 'ultimate', 'unable', 'unacceptable', 'unaware', \n     'uncertain', 'unchanged', 'uncomfortable', 'unconscious', 'underground',\n     'underlying', 'unemployed', 'uneven', 'unexpected', 'unfair', \n     'unfortunate', 'unhappy', 'uniform', 'uninterested', 'unique', 'united',\n     'universal', 'unknown', 'unlikely', 'unnecessary', 'unpleasant', \n     'unsightly', 'unusual', 'unwilling', 'upper', 'upset', 'uptight', \n     'urban', 'urgent', 'used', 'useful', 'useless', 'usual', 'vague', \n     'valid', 'valuable', 'variable', 'varied', 'various', 'varying', 'vast',\n     'verbal', 'vertical', 'very', 'victorian', 'victorious', 'video-taped', \n     'violent', 'visible', 'visiting', 'visual', 'vital', 'vivacious', \n     'vivid', 'vocational', 'voiceless', 'voluntary', 'vulnerable', \n     'wandering', 'warm', 'wasteful', 'watery', 'weak', 'wealthy', 'weary', \n     'wee', 'weekly', 'weird', 'welcome', 'well', 'well-known', 'welsh', \n     'western', 'wet', 'whispering', 'white', 'whole', 'wicked', 'wide',\n     'wide-eyed', 'widespread', 'wild', 'willing', 'wise', 'witty', \n     'wonderful', 'wooden', 'working', 'working-class', 'worldwide',\n     'worried', 'worrying', 'worthwhile', 'worthy', 'written', 'wrong',\n     'yellow', 'young', 'yummy', 'zany', 'zealous']\nb = ['abiding', 'accelerating', 'accepting', 'accomplishing', 'achieving', \n'acquiring', 'acteding', 'activating', 'adapting', 'adding', 'addressing', \n'administering', 'admiring', 'admiting', 'adopting', 'advising', 'affording', \n'agreeing', 'alerting', 'alighting', 'allowing', 'altereding', 'amusing', \n'analyzing', 'announcing', 'annoying', 'answering', 'anticipating', \n'apologizing', 'appearing', 'applauding', 'applieding', 'appointing',\n 'appraising', 'appreciating', 'approving', 'arbitrating', 'arguing', \n 'arising', 'arranging', 'arresting', 'arriving', 'ascertaining', 'asking', \n 'assembling', 'assessing', 'assisting', 'assuring', 'attaching', 'attacking', \n 'attaining', 'attempting', 'attending', 'attracting', 'auditeding', 'avoiding',\n 'awaking', 'backing', 'baking', 'balancing', 'baning', 'banging', 'baring', \n 'bating', 'bathing', 'battling', 'bing', 'beaming', 'bearing', 'beating', \n 'becoming', 'beging', 'begining', 'behaving', 'beholding', 'belonging', \n 'bending', 'beseting', 'beting', 'biding', 'binding', 'biting', 'bleaching',\n 'bleeding', 'blessing', 'blinding', 'blinking', 'bloting', 'blowing', \n 'blushing', 'boasting', 'boiling', 'bolting', 'bombing', 'booking', \n 'boring', 'borrowing', 'bouncing', 'bowing', 'boxing', 'braking', \n 'branching', 'breaking', 'breathing', 'breeding', 'briefing', 'bringing',\n 'broadcasting', 'bruising', 'brushing', 'bubbling', 'budgeting', 'building', \n 'bumping', 'burning', 'bursting', 'burying', 'busting', 'buying', 'buzing', \n 'calculating', 'calling', 'camping', 'caring', 'carrying', 'carving', \n 'casting', 'cataloging', 'catching', 'causing', 'challenging', 'changing',\n 'charging', 'charting', 'chasing', 'cheating', 'checking', 'cheering', \n 'chewing', 'choking', 'choosing', 'choping', 'claiming', 'claping', \n 'clarifying', 'classifying', 'cleaning', 'clearing', 'clinging', 'cliping',\n 'closing', 'clothing', 'coaching', 'coiling', 'collecting', 'coloring', \n 'combing', 'coming', 'commanding', 'communicating', 'comparing', 'competing',\n 'compiling', 'complaining', 'completing', 'composing', 'computing',\n 'conceiving', 'concentrating', 'conceptualizing', 'concerning', 'concluding',\n 'conducting', 'confessing', 'confronting', 'confusing', 'connecting', \n 'conserving', 'considering', 'consisting', 'consolidating', 'constructing',\n 'consulting', 'containing', 'continuing', 'contracting', 'controling', \n 'converting', 'coordinating', 'copying', 'correcting', 'correlating',\n 'costing', 'coughing', 'counseling', 'counting', 'covering', 'cracking',\n 'crashing', 'crawling', 'creating', 'creeping', 'critiquing', 'crossing', \n 'crushing', 'crying', 'curing', 'curling', 'curving', 'cuting', 'cycling',\n 'daming', 'damaging', 'dancing', 'daring', 'dealing', 'decaying', 'deceiving',\n 'deciding', 'decorating', 'defining', 'delaying', 'delegating', 'delighting',\n 'delivering', 'demonstrating', 'depending', 'describing', 'deserting', \n 'deserving', 'designing', 'destroying', 'detailing', 'detecting', \n 'determining', 'developing', 'devising', 'diagnosing', 'diging', \n 'directing', 'disagreing', 'disappearing', 'disapproving', 'disarming', \n 'discovering', 'disliking', 'dispensing', 'displaying', 'disproving',\n 'dissecting', 'distributing', 'diving', 'diverting', 'dividing', 'doing',\n 'doubling', 'doubting', 'drafting', 'draging', 'draining', 'dramatizing', \n 'drawing', 'dreaming', 'dressing', 'drinking', 'driping', 'driving', \n 'dropping', 'drowning', 'druming', 'drying', 'dusting', 'dwelling',\n 'earning', 'eating', 'editeding', 'educating', 'eliminating',\n 'embarrassing', 'employing', 'emptying', 'enacteding', 'encouraging',\n 'ending', 'enduring', 'enforcing', 'engineering', 'enhancing',\n 'enjoying', 'enlisting', 'ensuring', 'entering', 'entertaining',\n 'escaping', 'establishing', 'estimating', 'evaluating', 'examining',\n 'exceeding', 'exciting', 'excusing', 'executing', 'exercising', 'exhibiting',\n 'existing', 'expanding', 'expecting', 'expediting', 'experimenting', \n 'explaining', 'exploding', 'expressing', 'extending', 'extracting', \n 'facing', 'facilitating', 'fading', 'failing', 'fancying', 'fastening', \n 'faxing', 'fearing', 'feeding', 'feeling', 'fencing', 'fetching', 'fighting', \n 'filing', 'filling', 'filming', 'finalizing', 'financing', 'finding',\n 'firing', 'fiting', 'fixing', 'flaping', 'flashing', 'fleing', 'flinging',\n 'floating', 'flooding', 'flowing', 'flowering', 'flying', 'folding', \n 'following', 'fooling', 'forbiding', 'forcing', 'forecasting', 'foregoing', \n 'foreseing', 'foretelling', 'forgeting', 'forgiving', 'forming', \n 'formulating', 'forsaking', 'framing', 'freezing', 'frightening', 'frying',\n 'gathering', 'gazing', 'generating', 'geting', 'giving', 'glowing', 'gluing', \n 'going', 'governing', 'grabing', 'graduating', 'grating', 'greasing', 'greeting',\n 'grinning', 'grinding', 'griping', 'groaning', 'growing', 'guaranteeing',\n 'guarding', 'guessing', 'guiding', 'hammering', 'handing', 'handling', \n 'handwriting', 'hanging', 'happening', 'harassing', 'harming', 'hating',\n 'haunting', 'heading', 'healing', 'heaping', 'hearing', 'heating', 'helping', \n 'hiding', 'hitting', 'holding', 'hooking', 'hoping', 'hopping', 'hovering',\n 'hugging', 'hmuming', 'hunting', 'hurrying', 'hurting', 'hypothesizing', \n 'identifying', 'ignoring', 'illustrating', 'imagining', 'implementing', \n 'impressing', 'improving', 'improvising', 'including', 'increasing', \n 'inducing', 'influencing', 'informing', 'initiating', 'injecting', \n 'injuring', 'inlaying', 'innovating', 'inputing', 'inspecting', \n 'inspiring', 'installing', 'instituting', 'instructing', 'insuring', \n 'integrating', 'intending', 'intensifying', 'interesting', \n 'interfering', 'interlaying', 'interpreting', 'interrupting', \n 'interviewing', 'introducing', 'inventing', 'inventorying', \n 'investigating', 'inviting', 'irritating', 'itching', 'jailing', \n 'jamming', 'jogging', 'joining', 'joking', 'judging', 'juggling', 'jumping',\n 'justifying', 'keeping', 'kepting', 'kicking', 'killing', 'kissing', 'kneeling',\n 'kniting', 'knocking', 'knotting', 'knowing', 'labeling', 'landing', 'lasting',\n 'laughing', 'launching', 'laying', 'leading', 'leaning', 'leaping', 'learning', \n 'leaving', 'lecturing', 'leding', 'lending', 'leting', 'leveling', \n 'licensing', 'licking', 'lying', 'lifteding', 'lighting', 'lightening',\n 'liking', 'listing', 'listening', 'living', 'loading', 'locating', \n 'locking', 'loging', 'longing', 'looking', 'losing', 'loving', \n 'maintaining', 'making', 'maning', 'managing', 'manipulating', \n 'manufacturing', 'mapping', 'marching', 'marking', 'marketing',\n 'marrying', 'matching', 'mating', 'mattering', 'meaning', 'measuring',\n 'meddling', 'mediating', 'meeting', 'melting', 'melting', 'memorizing',\n 'mending', 'mentoring', 'milking', 'mining', 'misleading', 'missing',\n 'misspelling', 'mistaking', 'misunderstanding', 'mixing', 'moaning', \n 'modeling', 'modifying', 'monitoring', 'mooring', 'motivating',\n 'mourning', 'moving', 'mowing', 'muddling', 'muging', 'multiplying', \n 'murdering', 'nailing', 'naming', 'navigating', 'needing', 'negotiating', \n 'nesting', 'noding', 'nominating', 'normalizing', 'noting', 'noticing', \n 'numbering', 'obeying', 'objecting', 'observing', 'obtaining', 'occuring', \n 'offending', 'offering', 'officiating', 'opening', 'operating', 'ordering', \n 'organizing', 'orienteding', 'originating', 'overcoming', 'overdoing', \n 'overdrawing', 'overflowing', 'overhearing', 'overtaking', 'overthrowing',\n 'owing', 'owning', 'packing', 'paddling', 'painting', 'parking', 'parting', \n 'participating', 'passing', 'pasting', 'pating', 'pausing', 'paying',\n 'pecking', 'pedaling', 'peeling', 'peeping', 'perceiving', 'perfecting', \n 'performing', 'permiting', 'persuading', 'phoning', 'photographing',\n 'picking', 'piloting', 'pinching', 'pining', 'pinpointing', 'pioneering',\n 'placing', 'planing', 'planting', 'playing', 'pleading', 'pleasing',\n 'plugging', 'pointing', 'poking', 'polishing', 'poping', 'possessing',\n 'posting', 'pouring', 'practicing', 'praiseding', 'praying', 'preaching', \n 'preceding', 'predicting', 'prefering', 'preparing', 'prescribing', \n 'presenting', 'preserving', 'preseting', 'presiding', 'pressing', \n 'pretending', 'preventing', 'pricking', 'printing', 'processing', \n 'procuring', 'producing', 'professing', 'programing', 'progressing', \n 'projecting', 'promising', 'promoting', 'proofreading', 'proposing', \n 'protecting', 'proving', 'providing', 'publicizing', 'pulling', 'pumping',\n 'punching', 'puncturing', 'punishing', 'purchasing', 'pushing', 'puting',\n 'qualifying', 'questioning', 'queuing', 'quiting', 'racing', 'radiating',\n 'raining', 'raising', 'ranking', 'rating', 'reaching', 'reading', \n 'realigning', 'realizing', 'reasoning', 'receiving', 'recognizing', \n 'recommending', 'reconciling', 'recording', 'recruiting', 'reducing', \n 'referring', 'reflecting', 'refusing', 'regreting', 'regulating', \n 'rehabilitating', 'reigning', 'reinforcing', 'rejecting', 'rejoicing',\n 'relating', 'relaxing', 'releasing', 'relying', 'remaining', 'remembering',\n 'reminding', 'removing', 'rendering', 'reorganizing', 'repairing',\n 'repeating', 'replacing', 'replying', 'reporting', 'representing',\n 'reproducing', 'requesting', 'rescuing', 'researching', 'resolving', \n 'responding', 'restoreding', 'restructuring', 'retiring', 'retrieving',\n 'returning', 'reviewing', 'revising', 'rhyming', 'riding', 'riding', \n 'ringing', 'rinsing', 'rising', 'risking', 'robing', 'rocking', 'rolling',\n 'roting', 'rubing', 'ruining', 'ruling', 'runing', 'rushing', 'sacking',\n 'sailing', 'satisfying', 'saving', 'sawing', 'saying', 'scaring', \n 'scattering', 'scheduling', 'scolding', 'scorching', 'scraping', \n 'scratching', 'screaming', 'screwing', 'scribbling', 'scrubing', \n 'sealing', 'searching', 'securing', 'seing', 'seeking', 'selecting', \n 'selling', 'sending', 'sensing', 'separating', 'serving', 'servicing', \n 'seting', 'settling', 'sewing', 'shading', 'shaking', 'shaping', \n 'sharing', 'shaving', 'shearing', 'sheding', 'sheltering', 'shining', \n 'shivering', 'shocking', 'shoing', 'shooting', 'shoping', 'showing', \n 'shrinking', 'shruging', 'shuting', 'sighing', 'signing', 'signaling',\n 'simplifying', 'sining', 'singing', 'sinking', 'siping', 'siting',\n 'sketching', 'skiing', 'skiping', 'slaping', 'slaying', 'sleeping',\n 'sliding', 'slinging', 'slinking', 'sliping', 'sliting', 'slowing',\n 'smashing', 'smelling', 'smiling', 'smiting', 'smoking', 'snatching',\n 'sneaking', 'sneezing', 'sniffing', 'snoring', 'snowing', 'soaking', \n 'solving', 'soothing', 'soothsaying', 'sorting', 'sounding', 'sowing', \n 'sparing', 'sparking', 'sparkling', 'speaking', 'specifying', 'speeding',\n 'spelling', 'spending', 'spilling', 'spining', 'spiting', 'spliting',\n 'spoiling', 'spoting', 'spraying', 'spreading', 'springing', 'sprouting', \n 'squashing', 'squeaking', 'squealing', 'squeezing', 'staining', 'stamping',\n 'standing', 'staring', 'starting', 'staying', 'stealing', 'steering', \n 'stepping', 'sticking', 'stimulating', 'stinging', 'stinking', 'stirring', \n 'stitching', 'stoping', 'storing', 'straping', 'streamlining', \n 'strengthening', 'stretching', 'striding', 'striking', 'stringing', \n 'stripping', 'striving', 'stroking', 'structuring', 'studying', \n 'stuffing', 'subleting', 'subtracting', 'succeeding', 'sucking', \n 'suffering', 'suggesting', 'suiting', 'summarizing', 'supervising',\n 'supplying', 'supporting', 'supposing', 'surprising', 'surrounding', \n 'suspecting', 'suspending', 'swearing', 'sweating', 'sweeping', 'swelling', \n 'swimming', 'swinging', 'switching', 'symbolizing', 'synthesizing',\n 'systemizing', 'tabulating', 'taking', 'talking', 'taming', 'taping', \n 'targeting', 'tasting', 'teaching', 'tearing', 'teasing', 'telephoning', \n 'telling', 'tempting', 'terrifying', 'testing', 'thanking', 'thawing', \n 'thinking', 'thriving', 'throwing', 'thrusting', 'ticking', 'tickling', \n 'tying', 'timing', 'tiping', 'tiring', 'touching', 'touring', 'towing',\n 'tracing', 'trading', 'training', 'transcribing', 'transfering',\n 'transforming', 'translating', 'transporting', 'traping', 'traveling',\n 'treading', 'treating', 'trembling', 'tricking', 'triping', 'troting', \n 'troubling', 'troubleshooting', 'trusting', 'trying', 'tuging', 'tumbling',\n 'turning', 'tutoring', 'twisting', 'typing', 'undergoing', 'understanding',\n 'undertaking', 'undressing', 'unfastening', 'unifying', 'uniting', \n 'unlocking', 'unpacking', 'untidying', 'updating', 'upgrading', \n 'upholding', 'upseting', 'using', 'utilizing', 'vanishing', 'verbalizing',\n 'verifying', 'vexing', 'visiting', 'wailing', 'waiting', 'waking', \n 'walking', 'wandering', 'wanting', 'warming', 'warning', 'washing', \n 'wasting', 'watching', 'watering', 'waving', 'wearing', 'weaving', \n 'wedding', 'weeping', 'weighing', 'welcoming', 'wending', 'weting', \n 'whining', 'whiping', 'whirling', 'whispering', 'whistling', 'wining', \n 'winding', 'winking', 'wiping', 'wishing', 'withdrawing', 'withholding',\n 'withstanding', 'wobbling', 'wondering', 'working', 'worrying', 'wrapping', \n 'wrecking', 'wrestling', 'wriggling', 'wringing', 'writing', 'x-raying',\n 'yawning', 'yelling', 'zipping', 'zooming']"
  },
  {
    "path": "BigGAN_utils/binary_utils.py",
    "content": "from torch.autograd import Function\nfrom torch.optim import SGD\n\n\nclass BinaryActivation(Function):\n\n    @staticmethod\n    def forward(ctx, x):\n        ctx.save_for_backward(x)\n        return (x.sign() + 1.) / 2.\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        return grad_output.clone()\n"
  },
  {
    "path": "BigGAN_utils/calculate_inception_moments.py",
    "content": "''' Calculate Inception Moments\n This script iterates over the dataset and calculates the moments of the \n activations of the Inception net (needed for FID), and also returns\n the Inception Score of the training data.\n \n Note that if you don't shuffle the data, the IS of true data will be under-\n estimated as it is label-ordered. By default, the data is not shuffled\n so as to reduce non-determinism. '''\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nimport utils\nimport inception_utils\nfrom tqdm import tqdm, trange\nfrom argparse import ArgumentParser\n\ndef prepare_parser():\n  usage = 'Calculate and store inception metrics.'\n  parser = ArgumentParser(description=usage)\n  parser.add_argument(\n    '--dataset', type=str, default='I128_hdf5',\n    help='Which Dataset to train on, out of I128, I256, C10, C100...'\n         'Append _hdf5 to use the hdf5 version of the dataset. (default: %(default)s)')\n  parser.add_argument(\n    '--data_root', type=str, default='data',\n    help='Default location where data is stored (default: %(default)s)') \n  parser.add_argument(\n    '--batch_size', type=int, default=64,\n    help='Default overall batchsize (default: %(default)s)')\n  parser.add_argument(\n    '--parallel', action='store_true', default=False,\n    help='Train with multiple GPUs (default: %(default)s)')\n  parser.add_argument(\n    '--augment', action='store_true', default=False,\n    help='Augment with random crops and flips (default: %(default)s)')\n  parser.add_argument(\n    '--num_workers', type=int, default=8,\n    help='Number of dataloader workers (default: %(default)s)')\n  parser.add_argument(\n    '--shuffle', action='store_true', default=False,\n    help='Shuffle the data? (default: %(default)s)') \n  parser.add_argument(\n    '--seed', type=int, default=0,\n    help='Random seed to use.')\n  return parser\n\ndef run(config):\n  # Get loader\n  config['drop_last'] = False\n  loaders = utils.get_data_loaders(**config)\n\n  # Load inception net\n  net = inception_utils.load_inception_net(parallel=config['parallel'])\n  pool, logits, labels = [], [], []\n  device = 'cuda'\n  for i, (x, y) in enumerate(tqdm(loaders[0])):\n    x = x.to(device)\n    with torch.no_grad():\n      pool_val, logits_val = net(x)\n      pool += [np.asarray(pool_val.cpu())]\n      logits += [np.asarray(F.softmax(logits_val, 1).cpu())]\n      labels += [np.asarray(y.cpu())]\n\n  pool, logits, labels = [np.concatenate(item, 0) for item in [pool, logits, labels]]\n  # uncomment to save pool, logits, and labels to disk\n  # print('Saving pool, logits, and labels to disk...')\n  # np.savez(config['dataset']+'_inception_activations.npz',\n  #           {'pool': pool, 'logits': logits, 'labels': labels})\n  # Calculate inception metrics and report them\n  print('Calculating inception metrics...')\n  IS_mean, IS_std = inception_utils.calculate_inception_score(logits)\n  print('Training data from dataset %s has IS of %5.5f +/- %5.5f' % (config['dataset'], IS_mean, IS_std))\n  # Prepare mu and sigma, save to disk. Remove \"hdf5\" by default \n  # (the FID code also knows to strip \"hdf5\")\n  print('Calculating means and covariances...')\n  mu, sigma = np.mean(pool, axis=0), np.cov(pool, rowvar=False)\n  print('Saving calculated means and covariances to disk...')\n  np.savez(config['dataset'].strip('_hdf5')+'_inception_moments.npz', **{'mu' : mu, 'sigma' : sigma})\n\ndef main():\n  # parse command line    \n  parser = prepare_parser()\n  config = vars(parser.parse_args())\n  print(config)\n  run(config)\n\n\nif __name__ == '__main__':    \n    main()"
  },
  {
    "path": "BigGAN_utils/datasets.py",
    "content": "''' Datasets\r\n    This file contains definitions for our CIFAR, ImageFolder, and HDF5 datasets\r\n'''\r\nimport os\r\nimport os.path\r\nimport sys\r\nfrom PIL import Image\r\nimport numpy as np\r\nfrom tqdm import tqdm, trange\r\n\r\nimport torchvision.datasets as dset\r\nimport torchvision.transforms as transforms\r\nfrom torchvision.datasets.utils import download_url, check_integrity\r\nimport torch.utils.data as data\r\nfrom torch.utils.data import DataLoader\r\n         \r\nIMG_EXTENSIONS = ['.jpg', '.jpeg', '.png', '.ppm', '.bmp', '.pgm']\r\n\r\n\r\ndef is_image_file(filename):\r\n    \"\"\"Checks if a file is an image.\r\n\r\n    Args:\r\n        filename (string): path to a file\r\n\r\n    Returns:\r\n        bool: True if the filename ends with a known image extension\r\n    \"\"\"\r\n    filename_lower = filename.lower()\r\n    return any(filename_lower.endswith(ext) for ext in IMG_EXTENSIONS)\r\n\r\n\r\ndef find_classes(dir):\r\n    classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]\r\n    classes.sort()\r\n    class_to_idx = {classes[i]: i for i in range(len(classes))}\r\n    return classes, class_to_idx\r\n\r\n\r\ndef make_dataset(dir, class_to_idx):\r\n  images = []\r\n  dir = os.path.expanduser(dir)\r\n  for target in tqdm(sorted(os.listdir(dir))):\r\n    d = os.path.join(dir, target)\r\n    if not os.path.isdir(d):\r\n      continue\r\n\r\n    for root, _, fnames in sorted(os.walk(d)):\r\n      for fname in sorted(fnames):\r\n        if is_image_file(fname):\r\n          path = os.path.join(root, fname)\r\n          item = (path, class_to_idx[target])\r\n          images.append(item)\r\n\r\n  return images\r\n\r\n\r\ndef pil_loader(path):\r\n    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)\r\n  with open(path, 'rb') as f:\r\n    img = Image.open(f)\r\n    return img.convert('RGB')\r\n\r\n\r\ndef accimage_loader(path):\r\n  import accimage\r\n  try:\r\n    return accimage.Image(path)\r\n  except IOError:\r\n    # Potentially a decoding problem, fall back to PIL.Image\r\n    return pil_loader(path)\r\n\r\n\r\ndef default_loader(path):\r\n  from torchvision import get_image_backend\r\n  if get_image_backend() == 'accimage':\r\n    return accimage_loader(path)\r\n  else:\r\n    return pil_loader(path)\r\n\r\n\r\nclass ImageFolder(data.Dataset):\r\n  \"\"\"A generic data loader where the images are arranged in this way: ::\r\n\r\n      root/dogball/xxx.png\r\n      root/dogball/xxy.png\r\n      root/dogball/xxz.png\r\n\r\n      root/cat/123.png\r\n      root/cat/nsdf3.png\r\n      root/cat/asd932_.png\r\n\r\n  Args:\r\n      root (string): Root directory path.\r\n      transform (callable, optional): A function/transform that  takes in an PIL image\r\n          and returns a transformed version. E.g, ``transforms.RandomCrop``\r\n      target_transform (callable, optional): A function/transform that takes in the\r\n          target and transforms it.\r\n      loader (callable, optional): A function to load an image given its path.\r\n\r\n   Attributes:\r\n      classes (list): List of the class names.\r\n      class_to_idx (dict): Dict with items (class_name, class_index).\r\n      imgs (list): List of (image path, class_index) tuples\r\n  \"\"\"\r\n\r\n  def __init__(self, root, transform=None, target_transform=None,\r\n               loader=default_loader, load_in_mem=False, \r\n               index_filename='imagenet_imgs.npz', **kwargs):\r\n    classes, class_to_idx = find_classes(root)\r\n    # Load pre-computed image directory walk\r\n    if os.path.exists(index_filename):\r\n      print('Loading pre-saved Index file %s...' % index_filename)\r\n      imgs = np.load(index_filename)['imgs']\r\n    # If first time, walk the folder directory and save the \r\n    # results to a pre-computed file.\r\n    else:\r\n      print('Generating  Index file %s...' % index_filename)\r\n      imgs = make_dataset(root, class_to_idx)\r\n      np.savez_compressed(index_filename, **{'imgs' : imgs})\r\n    if len(imgs) == 0:\r\n      raise(RuntimeError(\"Found 0 images in subfolders of: \" + root + \"\\n\"\r\n                           \"Supported image extensions are: \" + \",\".join(IMG_EXTENSIONS)))\r\n\r\n    self.root = root\r\n    self.imgs = imgs\r\n    self.classes = classes\r\n    self.class_to_idx = class_to_idx\r\n    self.transform = transform\r\n    self.target_transform = target_transform\r\n    self.loader = loader\r\n    self.load_in_mem = load_in_mem\r\n    \r\n    if self.load_in_mem:\r\n      print('Loading all images into memory...')\r\n      self.data, self.labels = [], []\r\n      for index in tqdm(range(len(self.imgs))):\r\n        path, target = imgs[index][0], imgs[index][1]\r\n        self.data.append(self.transform(self.loader(path)))\r\n        self.labels.append(target)\r\n          \r\n\r\n  def __getitem__(self, index):\r\n    \"\"\"\r\n    Args:\r\n        index (int): Index\r\n\r\n    Returns:\r\n        tuple: (image, target) where target is class_index of the target class.\r\n    \"\"\"\r\n    if self.load_in_mem:\r\n        img = self.data[index]\r\n        target = self.labels[index]\r\n    else:\r\n      path, target = self.imgs[index]\r\n      img = self.loader(str(path))\r\n      if self.transform is not None:\r\n        img = self.transform(img)\r\n    \r\n    if self.target_transform is not None:\r\n      target = self.target_transform(target)\r\n    \r\n    # print(img.size(), target)\r\n    return img, int(target)\r\n\r\n  def __len__(self):\r\n    return len(self.imgs)\r\n\r\n  def __repr__(self):\r\n    fmt_str = 'Dataset ' + self.__class__.__name__ + '\\n'\r\n    fmt_str += '    Number of datapoints: {}\\n'.format(self.__len__())\r\n    fmt_str += '    Root Location: {}\\n'.format(self.root)\r\n    tmp = '    Transforms (if any): '\r\n    fmt_str += '{0}{1}\\n'.format(tmp, self.transform.__repr__().replace('\\n', '\\n' + ' ' * len(tmp)))\r\n    tmp = '    Target Transforms (if any): '\r\n    fmt_str += '{0}{1}'.format(tmp, self.target_transform.__repr__().replace('\\n', '\\n' + ' ' * len(tmp)))\r\n    return fmt_str\r\n        \r\n\r\n''' ILSVRC_HDF5: A dataset to support I/O from an HDF5 to avoid\r\n    having to load individual images all the time. '''\r\nimport h5py as h5\r\nimport torch\r\nclass ILSVRC_HDF5(data.Dataset):\r\n  def __init__(self, root, transform=None, target_transform=None,\r\n               load_in_mem=False, train=True,download=False, validate_seed=0,\r\n               val_split=0, **kwargs): # last four are dummies\r\n      \r\n    self.root = root\r\n    self.num_imgs = len(h5.File(root, 'r')['labels'])\r\n    \r\n    # self.transform = transform\r\n    self.target_transform = target_transform   \r\n    \r\n    # Set the transform here\r\n    self.transform = transform\r\n    \r\n    # load the entire dataset into memory? \r\n    self.load_in_mem = load_in_mem\r\n    \r\n    # If loading into memory, do so now\r\n    if self.load_in_mem:\r\n      print('Loading %s into memory...' % root)\r\n      with h5.File(root,'r') as f:\r\n        self.data = f['imgs'][:]\r\n        self.labels = f['labels'][:]\r\n\r\n  def __getitem__(self, index):\r\n    \"\"\"\r\n    Args:\r\n        index (int): Index\r\n\r\n    Returns:\r\n        tuple: (image, target) where target is class_index of the target class.\r\n    \"\"\"\r\n    # If loaded the entire dataset in RAM, get image from memory\r\n    if self.load_in_mem:\r\n      img = self.data[index]\r\n      target = self.labels[index]\r\n    \r\n    # Else load it from disk\r\n    else:\r\n      with h5.File(self.root,'r') as f:\r\n        img = f['imgs'][index]\r\n        target = f['labels'][index]\r\n    \r\n   \r\n    # if self.transform is not None:\r\n        # img = self.transform(img)\r\n    # Apply my own transform\r\n    img = ((torch.from_numpy(img).float() / 255) - 0.5) * 2\r\n    \r\n    if self.target_transform is not None:\r\n      target = self.target_transform(target)\r\n    \r\n    return img, int(target)\r\n\r\n  def __len__(self):\r\n      return self.num_imgs\r\n      # return len(self.f['imgs'])\r\n\r\nimport pickle\r\nclass CIFAR10(dset.CIFAR10):\r\n\r\n  def __init__(self, root, train=True,\r\n           transform=None, target_transform=None,\r\n           download=True, validate_seed=0,\r\n           val_split=0, load_in_mem=True, **kwargs):\r\n    self.root = os.path.expanduser(root)\r\n    self.transform = transform\r\n    self.target_transform = target_transform\r\n    self.train = train  # training set or test set\r\n    self.val_split = val_split\r\n\r\n    if download:\r\n      self.download()\r\n\r\n    if not self._check_integrity():\r\n      raise RuntimeError('Dataset not found or corrupted.' +\r\n                           ' You can use download=True to download it')\r\n\r\n    # now load the picked numpy arrays    \r\n    self.data = []\r\n    self.labels= []\r\n    for fentry in self.train_list:\r\n      f = fentry[0]\r\n      file = os.path.join(self.root, self.base_folder, f)\r\n      fo = open(file, 'rb')\r\n      if sys.version_info[0] == 2:\r\n        entry = pickle.load(fo)\r\n      else:\r\n        entry = pickle.load(fo, encoding='latin1')\r\n      self.data.append(entry['data'])\r\n      if 'labels' in entry:\r\n        self.labels += entry['labels']\r\n      else:\r\n        self.labels += entry['fine_labels']\r\n      fo.close()\r\n        \r\n    self.data = np.concatenate(self.data)\r\n    # Randomly select indices for validation\r\n    if self.val_split > 0:\r\n      label_indices = [[] for _ in range(max(self.labels)+1)]\r\n      for i,l in enumerate(self.labels):\r\n        label_indices[l] += [i]  \r\n      label_indices = np.asarray(label_indices)\r\n      \r\n      # randomly grab 500 elements of each class\r\n      np.random.seed(validate_seed)\r\n      self.val_indices = []           \r\n      for l_i in label_indices:\r\n        self.val_indices += list(l_i[np.random.choice(len(l_i), int(len(self.data) * val_split) // (max(self.labels) + 1) ,replace=False)])\r\n    \r\n    if self.train=='validate':    \r\n      self.data = self.data[self.val_indices]\r\n      self.labels = list(np.asarray(self.labels)[self.val_indices])\r\n      \r\n      self.data = self.data.reshape((int(50e3 * self.val_split), 3, 32, 32))\r\n      self.data = self.data.transpose((0, 2, 3, 1))  # convert to HWC\r\n    \r\n    elif self.train:\r\n      print(np.shape(self.data))\r\n      if self.val_split > 0:\r\n        self.data = np.delete(self.data,self.val_indices,axis=0)\r\n        self.labels = list(np.delete(np.asarray(self.labels),self.val_indices,axis=0))\r\n          \r\n      self.data = self.data.reshape((int(50e3 * (1.-self.val_split)), 3, 32, 32))\r\n      self.data = self.data.transpose((0, 2, 3, 1))  # convert to HWC\r\n    else:\r\n      f = self.test_list[0][0]\r\n      file = os.path.join(self.root, self.base_folder, f)\r\n      fo = open(file, 'rb')\r\n      if sys.version_info[0] == 2:\r\n        entry = pickle.load(fo)\r\n      else:\r\n        entry = pickle.load(fo, encoding='latin1')\r\n      self.data = entry['data']\r\n      if 'labels' in entry:\r\n        self.labels = entry['labels']\r\n      else:\r\n        self.labels = entry['fine_labels']\r\n      fo.close()\r\n      self.data = self.data.reshape((10000, 3, 32, 32))\r\n      self.data = self.data.transpose((0, 2, 3, 1))  # convert to HWC\r\n      \r\n  def __getitem__(self, index):\r\n    \"\"\"\r\n    Args:\r\n        index (int): Index\r\n    Returns:\r\n        tuple: (image, target) where target is index of the target class.\r\n    \"\"\"\r\n    img, target = self.data[index], self.labels[index]\r\n\r\n    # doing this so that it is consistent with all other datasets\r\n    # to return a PIL Image\r\n    img = Image.fromarray(img)\r\n\r\n    if self.transform is not None:\r\n      img = self.transform(img)\r\n\r\n    if self.target_transform is not None:\r\n      target = self.target_transform(target)\r\n\r\n    return img, target\r\n      \r\n  def __len__(self):\r\n      return len(self.data)\r\n\r\n\r\nclass CIFAR100(CIFAR10):\r\n    base_folder = 'cifar-100-python'\r\n    url = \"http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz\"\r\n    filename = \"cifar-100-python.tar.gz\"\r\n    tgz_md5 = 'eb9058c3a382ffc7106e4002c42a8d85'\r\n    train_list = [\r\n        ['train', '16019d7e3df5f24257cddd939b257f8d'],\r\n    ]\r\n\r\n    test_list = [\r\n        ['test', 'f0ef6b0ae62326f3e7ffdfab6717acfc'],\r\n    ]\r\n"
  },
  {
    "path": "BigGAN_utils/inception_tf13.py",
    "content": "''' Tensorflow inception score code\nDerived from https://github.com/openai/improved-gan\nCode derived from tensorflow/tensorflow/models/image/imagenet/classify_image.py\nTHIS CODE REQUIRES TENSORFLOW 1.3 or EARLIER to run in PARALLEL BATCH MODE \n\nTo use this code, run sample.py on your model with --sample_npz, and then \npass the experiment name in the --experiment_name.\nThis code also saves pool3 stats to an npz file for FID calculation\n'''\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\n\nimport os.path\nimport sys\nimport tarfile\nimport math\nfrom tqdm import tqdm, trange\nfrom argparse import ArgumentParser\n\nimport numpy as np\nfrom six.moves import urllib\nimport tensorflow as tf\n\nMODEL_DIR = ''\nDATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz'\nsoftmax = None\n\ndef prepare_parser():\n  usage = 'Parser for TF1.3- Inception Score scripts.'\n  parser = ArgumentParser(description=usage)\n  parser.add_argument(\n    '--experiment_name', type=str, default='',\n    help='Which experiment''s samples.npz file to pull and evaluate')\n  parser.add_argument(\n    '--experiment_root', type=str, default='samples',\n    help='Default location where samples are stored (default: %(default)s)')\n  parser.add_argument(\n    '--batch_size', type=int, default=500,\n    help='Default overall batchsize (default: %(default)s)')\n  return parser\n\n\ndef run(config):\n  # Inception with TF1.3 or earlier.\n  # Call this function with list of images. Each of elements should be a \n  # numpy array with values ranging from 0 to 255.\n  def get_inception_score(images, splits=10):\n    assert(type(images) == list)\n    assert(type(images[0]) == np.ndarray)\n    assert(len(images[0].shape) == 3)\n    assert(np.max(images[0]) > 10)\n    assert(np.min(images[0]) >= 0.0)\n    inps = []\n    for img in images:\n      img = img.astype(np.float32)\n      inps.append(np.expand_dims(img, 0))\n    bs = config['batch_size']\n    with tf.Session() as sess:\n      preds, pools = [], []\n      n_batches = int(math.ceil(float(len(inps)) / float(bs)))\n      for i in trange(n_batches):\n        inp = inps[(i * bs):min((i + 1) * bs, len(inps))]\n        inp = np.concatenate(inp, 0)\n        pred, pool = sess.run([softmax, pool3], {'ExpandDims:0': inp})\n        preds.append(pred)\n        pools.append(pool)\n      preds = np.concatenate(preds, 0)\n      scores = []\n      for i in range(splits):\n        part = preds[(i * preds.shape[0] // splits):((i + 1) * preds.shape[0] // splits), :]\n        kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))\n        kl = np.mean(np.sum(kl, 1))\n        scores.append(np.exp(kl))\n      return np.mean(scores), np.std(scores), np.squeeze(np.concatenate(pools, 0))\n  # Init inception\n  def _init_inception():\n    global softmax, pool3\n    if not os.path.exists(MODEL_DIR):\n      os.makedirs(MODEL_DIR)\n    filename = DATA_URL.split('/')[-1]\n    filepath = os.path.join(MODEL_DIR, filename)\n    if not os.path.exists(filepath):\n      def _progress(count, block_size, total_size):\n        sys.stdout.write('\\r>> Downloading %s %.1f%%' % (\n            filename, float(count * block_size) / float(total_size) * 100.0))\n        sys.stdout.flush()\n      filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)\n      print()\n      statinfo = os.stat(filepath)\n      print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')\n    tarfile.open(filepath, 'r:gz').extractall(MODEL_DIR)\n    with tf.gfile.FastGFile(os.path.join(\n        MODEL_DIR, 'classify_image_graph_def.pb'), 'rb') as f:\n      graph_def = tf.GraphDef()\n      graph_def.ParseFromString(f.read())\n      _ = tf.import_graph_def(graph_def, name='')\n    # Works with an arbitrary minibatch size.\n    with tf.Session() as sess:\n      pool3 = sess.graph.get_tensor_by_name('pool_3:0')\n      ops = pool3.graph.get_operations()\n      for op_idx, op in enumerate(ops):\n        for o in op.outputs:\n          shape = o.get_shape()\n          shape = [s.value for s in shape]\n          new_shape = []\n          for j, s in enumerate(shape):\n            if s == 1 and j == 0:\n              new_shape.append(None)\n            else:\n              new_shape.append(s)\n          o._shape = tf.TensorShape(new_shape)\n      w = sess.graph.get_operation_by_name(\"softmax/logits/MatMul\").inputs[1]\n      logits = tf.matmul(tf.squeeze(pool3), w)\n      softmax = tf.nn.softmax(logits)\n\n  # if softmax is None: # No need to functionalize like this.\n  _init_inception()\n\n  fname = '%s/%s/samples.npz' % (config['experiment_root'], config['experiment_name'])\n  print('loading %s ...'%fname)\n  ims = np.load(fname)['x']\n  import time\n  t0 = time.time()\n  inc_mean, inc_std, pool_activations = get_inception_score(list(ims.swapaxes(1,2).swapaxes(2,3)), splits=10)\n  t1 = time.time()\n  print('Saving pool to numpy file for FID calculations...')\n  np.savez('%s/%s/TF_pool.npz' % (config['experiment_root'], config['experiment_name']), **{'pool_mean': np.mean(pool_activations,axis=0), 'pool_var': np.cov(pool_activations, rowvar=False)})\n  print('Inception took %3f seconds, score of %3f +/- %3f.'%(t1-t0, inc_mean, inc_std))\ndef main():\n  # parse command line and run\n  parser = prepare_parser()\n  config = vars(parser.parse_args())\n  print(config)\n  run(config)\n\nif __name__ == '__main__':\n  main()"
  },
  {
    "path": "BigGAN_utils/inception_utils.py",
    "content": "''' Inception utilities\n    This file contains methods for calculating IS and FID, using either\n    the original numpy code or an accelerated fully-pytorch version that \n    uses a fast newton-schulz approximation for the matrix sqrt. There are also\n    methods for acquiring a desired number of samples from the Generator,\n    and parallelizing the inbuilt PyTorch inception network.\n    \n    NOTE that Inception Scores and FIDs calculated using these methods will \n    *not* be directly comparable to values calculated using the original TF\n    IS/FID code. You *must* use the TF model if you wish to report and compare\n    numbers. This code tends to produce IS values that are 5-10% lower than\n    those obtained through TF. \n'''    \nimport numpy as np\nfrom scipy import linalg # For numpy FID\nimport time\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom torch.nn import Parameter as P\nfrom torchvision.models.inception import inception_v3\n\n\n# Module that wraps the inception network to enable use with dataparallel and\n# returning pool features and logits.\nclass WrapInception(nn.Module):\n  def __init__(self, net):\n    super(WrapInception,self).__init__()\n    self.net = net\n    self.mean = P(torch.tensor([0.485, 0.456, 0.406]).view(1, -1, 1, 1),\n                  requires_grad=False)\n    self.std = P(torch.tensor([0.229, 0.224, 0.225]).view(1, -1, 1, 1),\n                 requires_grad=False)\n  def forward(self, x):\n    # Normalize x\n    x = (x + 1.) / 2.0\n    x = (x - self.mean) / self.std\n    # Upsample if necessary\n    if x.shape[2] != 299 or x.shape[3] != 299:\n      x = F.interpolate(x, size=(299, 299), mode='bilinear', align_corners=True)\n    # 299 x 299 x 3\n    x = self.net.Conv2d_1a_3x3(x)\n    # 149 x 149 x 32\n    x = self.net.Conv2d_2a_3x3(x)\n    # 147 x 147 x 32\n    x = self.net.Conv2d_2b_3x3(x)\n    # 147 x 147 x 64\n    x = F.max_pool2d(x, kernel_size=3, stride=2)\n    # 73 x 73 x 64\n    x = self.net.Conv2d_3b_1x1(x)\n    # 73 x 73 x 80\n    x = self.net.Conv2d_4a_3x3(x)\n    # 71 x 71 x 192\n    x = F.max_pool2d(x, kernel_size=3, stride=2)\n    # 35 x 35 x 192\n    x = self.net.Mixed_5b(x)\n    # 35 x 35 x 256\n    x = self.net.Mixed_5c(x)\n    # 35 x 35 x 288\n    x = self.net.Mixed_5d(x)\n    # 35 x 35 x 288\n    x = self.net.Mixed_6a(x)\n    # 17 x 17 x 768\n    x = self.net.Mixed_6b(x)\n    # 17 x 17 x 768\n    x = self.net.Mixed_6c(x)\n    # 17 x 17 x 768\n    x = self.net.Mixed_6d(x)\n    # 17 x 17 x 768\n    x = self.net.Mixed_6e(x)\n    # 17 x 17 x 768\n    # 17 x 17 x 768\n    x = self.net.Mixed_7a(x)\n    # 8 x 8 x 1280\n    x = self.net.Mixed_7b(x)\n    # 8 x 8 x 2048\n    x = self.net.Mixed_7c(x)\n    # 8 x 8 x 2048\n    pool = torch.mean(x.view(x.size(0), x.size(1), -1), 2)\n    # 1 x 1 x 2048\n    logits = self.net.fc(F.dropout(pool, training=False).view(pool.size(0), -1))\n    # 1000 (num_classes)\n    return pool, logits\n\n\n# A pytorch implementation of cov, from Modar M. Alfadly\n# https://discuss.pytorch.org/t/covariance-and-gradient-support/16217/2\ndef torch_cov(m, rowvar=False):\n    '''Estimate a covariance matrix given data.\n\n    Covariance indicates the level to which two variables vary together.\n    If we examine N-dimensional samples, `X = [x_1, x_2, ... x_N]^T`,\n    then the covariance matrix element `C_{ij}` is the covariance of\n    `x_i` and `x_j`. The element `C_{ii}` is the variance of `x_i`.\n\n    Args:\n        m: A 1-D or 2-D array containing multiple variables and observations.\n            Each row of `m` represents a variable, and each column a single\n            observation of all those variables.\n        rowvar: If `rowvar` is True, then each row represents a\n            variable, with observations in the columns. Otherwise, the\n            relationship is transposed: each column represents a variable,\n            while the rows contain observations.\n\n    Returns:\n        The covariance matrix of the variables.\n    '''\n    if m.dim() > 2:\n        raise ValueError('m has more than 2 dimensions')\n    if m.dim() < 2:\n        m = m.view(1, -1)\n    if not rowvar and m.size(0) != 1:\n        m = m.t()\n    # m = m.type(torch.double)  # uncomment this line if desired\n    fact = 1.0 / (m.size(1) - 1)\n    m -= torch.mean(m, dim=1, keepdim=True)\n    mt = m.t()  # if complex: mt = m.t().conj()\n    return fact * m.matmul(mt).squeeze()\n\n\n# Pytorch implementation of matrix sqrt, from Tsung-Yu Lin, and Subhransu Maji\n# https://github.com/msubhransu/matrix-sqrt \ndef sqrt_newton_schulz(A, numIters, dtype=None):\n  with torch.no_grad():\n    if dtype is None:\n      dtype = A.type()\n    batchSize = A.shape[0]\n    dim = A.shape[1]\n    normA = A.mul(A).sum(dim=1).sum(dim=1).sqrt()\n    Y = A.div(normA.view(batchSize, 1, 1).expand_as(A));\n    I = torch.eye(dim,dim).view(1, dim, dim).repeat(batchSize,1,1).type(dtype)\n    Z = torch.eye(dim,dim).view(1, dim, dim).repeat(batchSize,1,1).type(dtype)\n    for i in range(numIters):\n      T = 0.5*(3.0*I - Z.bmm(Y))\n      Y = Y.bmm(T)\n      Z = T.bmm(Z)\n    sA = Y*torch.sqrt(normA).view(batchSize, 1, 1).expand_as(A)\n  return sA\n\n\n# FID calculator from TTUR--consider replacing this with GPU-accelerated cov\n# calculations using torch?\ndef numpy_calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):\n  \"\"\"Numpy implementation of the Frechet Distance.\n  Taken from https://github.com/bioinf-jku/TTUR\n  The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)\n  and X_2 ~ N(mu_2, C_2) is\n          d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).\n  Stable version by Dougal J. Sutherland.\n  Params:\n  -- mu1   : Numpy array containing the activations of a layer of the\n             inception net (like returned by the function 'get_predictions')\n             for generated samples.\n  -- mu2   : The sample mean over activations, precalculated on an \n             representive data set.\n  -- sigma1: The covariance matrix over activations for generated samples.\n  -- sigma2: The covariance matrix over activations, precalculated on an \n             representive data set.\n  Returns:\n  --   : The Frechet Distance.\n  \"\"\"\n\n  mu1 = np.atleast_1d(mu1)\n  mu2 = np.atleast_1d(mu2)\n\n  sigma1 = np.atleast_2d(sigma1)\n  sigma2 = np.atleast_2d(sigma2)\n\n  assert mu1.shape == mu2.shape, \\\n    'Training and test mean vectors have different lengths'\n  assert sigma1.shape == sigma2.shape, \\\n    'Training and test covariances have different dimensions'\n\n  diff = mu1 - mu2\n\n  # Product might be almost singular\n  covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)\n  if not np.isfinite(covmean).all():\n    msg = ('fid calculation produces singular product; '\n           'adding %s to diagonal of cov estimates') % eps\n    print(msg)\n    offset = np.eye(sigma1.shape[0]) * eps\n    covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))\n\n  # Numerical error might give slight imaginary component\n  if np.iscomplexobj(covmean):\n    print('wat')\n    if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):\n      m = np.max(np.abs(covmean.imag))\n      raise ValueError('Imaginary component {}'.format(m))\n    covmean = covmean.real  \n\n  tr_covmean = np.trace(covmean) \n\n  out = diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean\n  return out\n\n\ndef torch_calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):\n  \"\"\"Pytorch implementation of the Frechet Distance.\n  Taken from https://github.com/bioinf-jku/TTUR\n  The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)\n  and X_2 ~ N(mu_2, C_2) is\n          d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).\n  Stable version by Dougal J. Sutherland.\n  Params:\n  -- mu1   : Numpy array containing the activations of a layer of the\n             inception net (like returned by the function 'get_predictions')\n             for generated samples.\n  -- mu2   : The sample mean over activations, precalculated on an \n             representive data set.\n  -- sigma1: The covariance matrix over activations for generated samples.\n  -- sigma2: The covariance matrix over activations, precalculated on an \n             representive data set.\n  Returns:\n  --   : The Frechet Distance.\n  \"\"\"\n\n\n  assert mu1.shape == mu2.shape, \\\n    'Training and test mean vectors have different lengths'\n  assert sigma1.shape == sigma2.shape, \\\n    'Training and test covariances have different dimensions'\n\n  diff = mu1 - mu2\n  # Run 50 itrs of newton-schulz to get the matrix sqrt of sigma1 dot sigma2\n  covmean = sqrt_newton_schulz(sigma1.mm(sigma2).unsqueeze(0), 50).squeeze()  \n  out = (diff.dot(diff) +  torch.trace(sigma1) + torch.trace(sigma2)\n         - 2 * torch.trace(covmean))\n  return out\n\n\n# Calculate Inception Score mean + std given softmax'd logits and number of splits\ndef calculate_inception_score(pred, num_splits=10):\n  scores = []\n  for index in range(num_splits):\n    pred_chunk = pred[index * (pred.shape[0] // num_splits): (index + 1) * (pred.shape[0] // num_splits), :]\n    kl_inception = pred_chunk * (np.log(pred_chunk) - np.log(np.expand_dims(np.mean(pred_chunk, 0), 0)))\n    kl_inception = np.mean(np.sum(kl_inception, 1))\n    scores.append(np.exp(kl_inception))\n  return np.mean(scores), np.std(scores)\n\n\n# Loop and run the sampler and the net until it accumulates num_inception_images\n# activations. Return the pool, the logits, and the labels (if one wants \n# Inception Accuracy the labels of the generated class will be needed)\ndef accumulate_inception_activations(sample, net, num_inception_images=50000):\n  pool, logits, labels = [], [], []\n  while (torch.cat(logits, 0).shape[0] if len(logits) else 0) < num_inception_images:\n    with torch.no_grad():\n      images, labels_val = sample()\n      pool_val, logits_val = net(images.float())\n      pool += [pool_val]\n      logits += [F.softmax(logits_val, 1)]\n      labels += [labels_val]\n  return torch.cat(pool, 0), torch.cat(logits, 0), torch.cat(labels, 0)\n\n\n# Load and wrap the Inception model\ndef load_inception_net(parallel=False):\n  inception_model = inception_v3(pretrained=True, transform_input=False)\n  inception_model = WrapInception(inception_model.eval()).cuda()\n  if parallel:\n    print('Parallelizing Inception module...')\n    inception_model = nn.DataParallel(inception_model)\n  return inception_model\n\n\n# This produces a function which takes in an iterator which returns a set number of samples\n# and iterates until it accumulates config['num_inception_images'] images.\n# The iterator can return samples with a different batch size than used in\n# training, using the setting confg['inception_batchsize']\ndef prepare_inception_metrics(dataset, parallel, no_fid=False):\n  # Load metrics; this is intentionally not in a try-except loop so that\n  # the script will crash here if it cannot find the Inception moments.\n  # By default, remove the \"hdf5\" from dataset\n  dataset = dataset.strip('_hdf5')\n  data_mu = np.load(dataset+'_inception_moments.npz')['mu']\n  data_sigma = np.load(dataset+'_inception_moments.npz')['sigma']\n  # Load network\n  net = load_inception_net(parallel)\n  def get_inception_metrics(sample, num_inception_images, num_splits=10, \n                            prints=True, use_torch=True):\n    if prints:\n      print('Gathering activations...')\n    pool, logits, labels = accumulate_inception_activations(sample, net, num_inception_images)\n    if prints:  \n      print('Calculating Inception Score...')\n    IS_mean, IS_std = calculate_inception_score(logits.cpu().numpy(), num_splits)\n    if no_fid:\n      FID = 9999.0\n    else:\n      if prints:\n        print('Calculating means and covariances...')\n      if use_torch:\n        mu, sigma = torch.mean(pool, 0), torch_cov(pool, rowvar=False)\n      else:\n        mu, sigma = np.mean(pool.cpu().numpy(), axis=0), np.cov(pool.cpu().numpy(), rowvar=False)\n      if prints:\n        print('Covariances calculated, getting FID...')\n      if use_torch:\n        FID = torch_calculate_frechet_distance(mu, sigma, torch.tensor(data_mu).float().cuda(), torch.tensor(data_sigma).float().cuda())\n        FID = float(FID.cpu().numpy())\n      else:\n        FID = numpy_calculate_frechet_distance(mu.cpu().numpy(), sigma.cpu().numpy(), data_mu, data_sigma)\n    # Delete mu, sigma, pool, logits, and labels, just in case\n    del mu, sigma, pool, logits, labels\n    return IS_mean, IS_std, FID\n  return get_inception_metrics"
  },
  {
    "path": "BigGAN_utils/layers.py",
    "content": "''' Layers\n    This file contains various layers for the BigGAN models.\n'''\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom torch.nn import init\nimport torch.optim as optim\nimport torch.nn.functional as F\nfrom torch.nn import Parameter as P\n\nfrom sync_batchnorm import SynchronizedBatchNorm2d as SyncBN2d\n\n\n# Projection of x onto y\ndef proj(x, y):\n  return torch.mm(y, x.t()) * y / torch.mm(y, y.t())\n\n\n# Orthogonalize x wrt list of vectors ys\ndef gram_schmidt(x, ys):\n  for y in ys:\n    x = x - proj(x, y)\n  return x\n\n\n# Apply num_itrs steps of the power method to estimate top N singular values.\ndef power_iteration(W, u_, update=True, eps=1e-12):\n  # Lists holding singular vectors and values\n  us, vs, svs = [], [], []\n  for i, u in enumerate(u_):\n    # Run one step of the power iteration\n    with torch.no_grad():\n      v = torch.matmul(u, W)\n      # Run Gram-Schmidt to subtract components of all other singular vectors\n      v = F.normalize(gram_schmidt(v, vs), eps=eps)\n      # Add to the list\n      vs += [v]\n      # Update the other singular vector\n      u = torch.matmul(v, W.t())\n      # Run Gram-Schmidt to subtract components of all other singular vectors\n      u = F.normalize(gram_schmidt(u, us), eps=eps)\n      # Add to the list\n      us += [u]\n      if update:\n        u_[i][:] = u\n    # Compute this singular value and add it to the list\n    svs += [torch.squeeze(torch.matmul(torch.matmul(v, W.t()), u.t()))]\n    #svs += [torch.sum(F.linear(u, W.transpose(0, 1)) * v)]\n  return svs, us, vs\n\n\n# Convenience passthrough function\nclass identity(nn.Module):\n  def forward(self, input):\n    return input\n \n\n# Spectral normalization base class \nclass SN(object):\n  def __init__(self, num_svs, num_itrs, num_outputs, transpose=False, eps=1e-12):\n    # Number of power iterations per step\n    self.num_itrs = num_itrs\n    # Number of singular values\n    self.num_svs = num_svs\n    # Transposed?\n    self.transpose = transpose\n    # Epsilon value for avoiding divide-by-0\n    self.eps = eps\n    # Register a singular vector for each sv\n    for i in range(self.num_svs):\n      self.register_buffer('u%d' % i, torch.randn(1, num_outputs))\n      self.register_buffer('sv%d' % i, torch.ones(1))\n  \n  # Singular vectors (u side)\n  @property\n  def u(self):\n    return [getattr(self, 'u%d' % i) for i in range(self.num_svs)]\n\n  # Singular values; \n  # note that these buffers are just for logging and are not used in training. \n  @property\n  def sv(self):\n   return [getattr(self, 'sv%d' % i) for i in range(self.num_svs)]\n   \n  # Compute the spectrally-normalized weight\n  def W_(self):\n    W_mat = self.weight.view(self.weight.size(0), -1)\n    if self.transpose:\n      W_mat = W_mat.t()\n    # Apply num_itrs power iterations\n    for _ in range(self.num_itrs):\n      svs, us, vs = power_iteration(W_mat, self.u, update=self.training, eps=self.eps) \n    # Update the svs\n    if self.training:\n      with torch.no_grad(): # Make sure to do this in a no_grad() context or you'll get memory leaks!\n        for i, sv in enumerate(svs):\n          self.sv[i][:] = sv     \n    return self.weight / svs[0]\n\n\n# 2D Conv layer with spectral norm\nclass SNConv2d(nn.Conv2d, SN):\n  def __init__(self, in_channels, out_channels, kernel_size, stride=1,\n             padding=0, dilation=1, groups=1, bias=True, \n             num_svs=1, num_itrs=1, eps=1e-12):\n    nn.Conv2d.__init__(self, in_channels, out_channels, kernel_size, stride, \n                     padding, dilation, groups, bias)\n    SN.__init__(self, num_svs, num_itrs, out_channels, eps=eps)    \n  def forward(self, x):\n    return F.conv2d(x, self.W_(), self.bias, self.stride, \n                    self.padding, self.dilation, self.groups)\n\n\n# Linear layer with spectral norm\nclass SNLinear(nn.Linear, SN):\n  def __init__(self, in_features, out_features, bias=True,\n               num_svs=1, num_itrs=1, eps=1e-12):\n    nn.Linear.__init__(self, in_features, out_features, bias)\n    SN.__init__(self, num_svs, num_itrs, out_features, eps=eps)\n  def forward(self, x):\n    return F.linear(x, self.W_(), self.bias)\n\n\n# Embedding layer with spectral norm\n# We use num_embeddings as the dim instead of embedding_dim here\n# for convenience sake\nclass SNEmbedding(nn.Embedding, SN):\n  def __init__(self, num_embeddings, embedding_dim, padding_idx=None, \n               max_norm=None, norm_type=2, scale_grad_by_freq=False,\n               sparse=False, _weight=None,\n               num_svs=1, num_itrs=1, eps=1e-12):\n    nn.Embedding.__init__(self, num_embeddings, embedding_dim, padding_idx,\n                          max_norm, norm_type, scale_grad_by_freq, \n                          sparse, _weight)\n    SN.__init__(self, num_svs, num_itrs, num_embeddings, eps=eps)\n  def forward(self, x):\n    return F.embedding(x, self.W_())\n\n\n# A non-local block as used in SA-GAN\n# Note that the implementation as described in the paper is largely incorrect;\n# refer to the released code for the actual implementation.\nclass Attention(nn.Module):\n  def __init__(self, ch, which_conv=SNConv2d, name='attention'):\n    super(Attention, self).__init__()\n    # Channel multiplier\n    self.ch = ch\n    self.which_conv = which_conv\n    self.theta = self.which_conv(self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False)\n    self.phi = self.which_conv(self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False)\n    self.g = self.which_conv(self.ch, self.ch // 2, kernel_size=1, padding=0, bias=False)\n    self.o = self.which_conv(self.ch // 2, self.ch, kernel_size=1, padding=0, bias=False)\n    # Learnable gain parameter\n    self.gamma = P(torch.tensor(0.), requires_grad=True)\n  def forward(self, x, y=None):\n    # Apply convs\n    theta = self.theta(x)\n    phi = F.max_pool2d(self.phi(x), [2,2])\n    g = F.max_pool2d(self.g(x), [2,2])    \n    # Perform reshapes\n    theta = theta.view(-1, self. ch // 8, x.shape[2] * x.shape[3])\n    phi = phi.view(-1, self. ch // 8, x.shape[2] * x.shape[3] // 4)\n    g = g.view(-1, self. ch // 2, x.shape[2] * x.shape[3] // 4)\n    # Matmul and softmax to get attention maps\n    beta = F.softmax(torch.bmm(theta.transpose(1, 2), phi), -1)\n    # Attention map times g path\n    o = self.o(torch.bmm(g, beta.transpose(1,2)).view(-1, self.ch // 2, x.shape[2], x.shape[3]))\n    return self.gamma * o + x\n\n\n# Fused batchnorm op\ndef fused_bn(x, mean, var, gain=None, bias=None, eps=1e-5):\n  # Apply scale and shift--if gain and bias are provided, fuse them here\n  # Prepare scale\n  scale = torch.rsqrt(var + eps)\n  # If a gain is provided, use it\n  if gain is not None:\n    scale = scale * gain\n  # Prepare shift\n  shift = mean * scale\n  # If bias is provided, use it\n  if bias is not None:\n    shift = shift - bias\n  return x * scale - shift\n  #return ((x - mean) / ((var + eps) ** 0.5)) * gain + bias # The unfused way.\n\n\n# Manual BN\n# Calculate means and variances using mean-of-squares minus mean-squared\ndef manual_bn(x, gain=None, bias=None, return_mean_var=False, eps=1e-5):\n  # Cast x to float32 if necessary\n  float_x = x.float()\n  # Calculate expected value of x (m) and expected value of x**2 (m2)  \n  # Mean of x\n  m = torch.mean(float_x, [0, 2, 3], keepdim=True)\n  # Mean of x squared\n  m2 = torch.mean(float_x ** 2, [0, 2, 3], keepdim=True)\n  # Calculate variance as mean of squared minus mean squared.\n  var = (m2 - m **2)\n  # Cast back to float 16 if necessary\n  var = var.type(x.type())\n  m = m.type(x.type())\n  # Return mean and variance for updating stored mean/var if requested  \n  if return_mean_var:\n    return fused_bn(x, m, var, gain, bias, eps), m.squeeze(), var.squeeze()\n  else:\n    return fused_bn(x, m, var, gain, bias, eps)\n\n\n# My batchnorm, supports standing stats    \nclass myBN(nn.Module):\n  def __init__(self, num_channels, eps=1e-5, momentum=0.1):\n    super(myBN, self).__init__()\n    # momentum for updating running stats\n    self.momentum = momentum\n    # epsilon to avoid dividing by 0\n    self.eps = eps\n    # Momentum\n    self.momentum = momentum\n    # Register buffers\n    self.register_buffer('stored_mean', torch.zeros(num_channels))\n    self.register_buffer('stored_var',  torch.ones(num_channels))\n    self.register_buffer('accumulation_counter', torch.zeros(1))\n    # Accumulate running means and vars\n    self.accumulate_standing = False\n    \n  # reset standing stats\n  def reset_stats(self):\n    self.stored_mean[:] = 0\n    self.stored_var[:] = 0\n    self.accumulation_counter[:] = 0\n    \n  def forward(self, x, gain, bias):\n    if self.training:\n      out, mean, var = manual_bn(x, gain, bias, return_mean_var=True, eps=self.eps)\n      # If accumulating standing stats, increment them\n      if self.accumulate_standing:\n        self.stored_mean[:] = self.stored_mean + mean.data\n        self.stored_var[:] = self.stored_var + var.data\n        self.accumulation_counter += 1.0\n      # If not accumulating standing stats, take running averages\n      else:\n        self.stored_mean[:] = self.stored_mean * (1 - self.momentum) + mean * self.momentum\n        self.stored_var[:] = self.stored_var * (1 - self.momentum) + var * self.momentum\n      return out\n    # If not in training mode, use the stored statistics\n    else:         \n      mean = self.stored_mean.view(1, -1, 1, 1)\n      var = self.stored_var.view(1, -1, 1, 1)\n      # If using standing stats, divide them by the accumulation counter   \n      if self.accumulate_standing:\n        mean = mean / self.accumulation_counter\n        var = var / self.accumulation_counter\n      return fused_bn(x, mean, var, gain, bias, self.eps)\n\n\n# Simple function to handle groupnorm norm stylization                      \ndef groupnorm(x, norm_style):\n  # If number of channels specified in norm_style:\n  if 'ch' in norm_style:\n    ch = int(norm_style.split('_')[-1])\n    groups = max(int(x.shape[1]) // ch, 1)\n  # If number of groups specified in norm style\n  elif 'grp' in norm_style:\n    groups = int(norm_style.split('_')[-1])\n  # If neither, default to groups = 16\n  else:\n    groups = 16\n  return F.group_norm(x, groups)\n\n\n# Class-conditional bn\n# output size is the number of channels, input size is for the linear layers\n# Andy's Note: this class feels messy but I'm not really sure how to clean it up\n# Suggestions welcome! (By which I mean, refactor this and make a pull request\n# if you want to make this more readable/usable). \nclass ccbn(nn.Module):\n  def __init__(self, output_size, input_size, which_linear, eps=1e-5, momentum=0.1,\n               cross_replica=False, mybn=False, norm_style='bn',):\n    super(ccbn, self).__init__()\n    self.output_size, self.input_size = output_size, input_size\n    # Prepare gain and bias layers\n    self.gain = which_linear(input_size, output_size)\n    self.bias = which_linear(input_size, output_size)\n    # epsilon to avoid dividing by 0\n    self.eps = eps\n    # Momentum\n    self.momentum = momentum\n    # Use cross-replica batchnorm?\n    self.cross_replica = cross_replica\n    # Use my batchnorm?\n    self.mybn = mybn\n    # Norm style?\n    self.norm_style = norm_style\n    \n    if self.cross_replica:\n      self.bn = SyncBN2d(output_size, eps=self.eps, momentum=self.momentum, affine=False)\n    elif self.mybn:\n      self.bn = myBN(output_size, self.eps, self.momentum)\n    elif self.norm_style in ['bn', 'in']:\n      self.register_buffer('stored_mean', torch.zeros(output_size))\n      self.register_buffer('stored_var',  torch.ones(output_size)) \n    \n    \n  def forward(self, x, y):\n    # Calculate class-conditional gains and biases\n    gain = (1 + self.gain(y)).view(y.size(0), -1, 1, 1)\n    bias = self.bias(y).view(y.size(0), -1, 1, 1)\n    # If using my batchnorm\n    if self.mybn or self.cross_replica:\n      return self.bn(x, gain=gain, bias=bias)\n    # else:\n    else:\n      if self.norm_style == 'bn':\n        out = F.batch_norm(x, self.stored_mean, self.stored_var, None, None,\n                          self.training, 0.1, self.eps)\n      elif self.norm_style == 'in':\n        out = F.instance_norm(x, self.stored_mean, self.stored_var, None, None,\n                          self.training, 0.1, self.eps)\n      elif self.norm_style == 'gn':\n        out = groupnorm(x, self.normstyle)\n      elif self.norm_style == 'nonorm':\n        out = x\n      return out * gain + bias\n  def extra_repr(self):\n    s = 'out: {output_size}, in: {input_size},'\n    s +=' cross_replica={cross_replica}'\n    return s.format(**self.__dict__)\n\n\n# Normal, non-class-conditional BN\nclass bn(nn.Module):\n  def __init__(self, output_size,  eps=1e-5, momentum=0.1,\n                cross_replica=False, mybn=False):\n    super(bn, self).__init__()\n    self.output_size= output_size\n    # Prepare gain and bias layers\n    self.gain = P(torch.ones(output_size), requires_grad=True)\n    self.bias = P(torch.zeros(output_size), requires_grad=True)\n    # epsilon to avoid dividing by 0\n    self.eps = eps\n    # Momentum\n    self.momentum = momentum\n    # Use cross-replica batchnorm?\n    self.cross_replica = cross_replica\n    # Use my batchnorm?\n    self.mybn = mybn\n    \n    if self.cross_replica:\n      self.bn = SyncBN2d(output_size, eps=self.eps, momentum=self.momentum, affine=False)    \n    elif mybn:\n      self.bn = myBN(output_size, self.eps, self.momentum)\n     # Register buffers if neither of the above\n    else:     \n      self.register_buffer('stored_mean', torch.zeros(output_size))\n      self.register_buffer('stored_var',  torch.ones(output_size))\n    \n  def forward(self, x, y=None):\n    if self.cross_replica or self.mybn:\n      gain = self.gain.view(1,-1,1,1)\n      bias = self.bias.view(1,-1,1,1)\n      return self.bn(x, gain=gain, bias=bias)\n    else:\n      return F.batch_norm(x, self.stored_mean, self.stored_var, self.gain,\n                          self.bias, self.training, self.momentum, self.eps)\n\n                          \n# Generator blocks\n# Note that this class assumes the kernel size and padding (and any other\n# settings) have been selected in the main generator module and passed in\n# through the which_conv arg. Similar rules apply with which_bn (the input\n# size [which is actually the number of channels of the conditional info] must \n# be preselected)\nclass GBlock(nn.Module):\n  def __init__(self, in_channels, out_channels,\n               which_conv=nn.Conv2d, which_bn=bn, activation=None, \n               upsample=None):\n    super(GBlock, self).__init__()\n    \n    self.in_channels, self.out_channels = in_channels, out_channels\n    self.which_conv, self.which_bn = which_conv, which_bn\n    self.activation = activation\n    self.upsample = upsample\n    # Conv layers\n    self.conv1 = self.which_conv(self.in_channels, self.out_channels)\n    self.conv2 = self.which_conv(self.out_channels, self.out_channels)\n    self.learnable_sc = in_channels != out_channels or upsample\n    if self.learnable_sc:\n      self.conv_sc = self.which_conv(in_channels, out_channels, \n                                     kernel_size=1, padding=0)\n    # Batchnorm layers\n    self.bn1 = self.which_bn(in_channels)\n    self.bn2 = self.which_bn(out_channels)\n    # upsample layers\n    self.upsample = upsample\n\n  def forward(self, x, y):\n    h = self.activation(self.bn1(x, y))\n    if self.upsample:\n      h = self.upsample(h)\n      x = self.upsample(x)\n    h = self.conv1(h)\n    h = self.activation(self.bn2(h, y))\n    h = self.conv2(h)\n    if self.learnable_sc:       \n      x = self.conv_sc(x)\n    return h + x\n    \n    \n# Residual block for the discriminator\nclass DBlock(nn.Module):\n  def __init__(self, in_channels, out_channels, which_conv=SNConv2d, wide=True,\n               preactivation=False, activation=None, downsample=None,):\n    super(DBlock, self).__init__()\n    self.in_channels, self.out_channels = in_channels, out_channels\n    # If using wide D (as in SA-GAN and BigGAN), change the channel pattern\n    self.hidden_channels = self.out_channels if wide else self.in_channels\n    self.which_conv = which_conv\n    self.preactivation = preactivation\n    self.activation = activation\n    self.downsample = downsample\n        \n    # Conv layers\n    self.conv1 = self.which_conv(self.in_channels, self.hidden_channels)\n    self.conv2 = self.which_conv(self.hidden_channels, self.out_channels)\n    self.learnable_sc = True if (in_channels != out_channels) or downsample else False\n    if self.learnable_sc:\n      self.conv_sc = self.which_conv(in_channels, out_channels, \n                                     kernel_size=1, padding=0)\n  def shortcut(self, x):\n    if self.preactivation:\n      if self.learnable_sc:\n        x = self.conv_sc(x)\n      if self.downsample:\n        x = self.downsample(x)\n    else:\n      if self.downsample:\n        x = self.downsample(x)\n      if self.learnable_sc:\n        x = self.conv_sc(x)\n    return x\n    \n  def forward(self, x):\n    if self.preactivation:\n      # h = self.activation(x) # NOT TODAY SATAN\n      # Andy's note: This line *must* be an out-of-place ReLU or it \n      #              will negatively affect the shortcut connection.\n      h = F.relu(x)\n    else:\n      h = x    \n    h = self.conv1(h)\n    h = self.conv2(self.activation(h))\n    if self.downsample:\n      h = self.downsample(h)     \n        \n    return h + self.shortcut(x)\n    \n# dogball"
  },
  {
    "path": "BigGAN_utils/logs/BigGAN_ch96_bs256x8.jsonl",
    "content": "{\"itr\": 2000, \"IS_mean\": 2.806771755218506, \"IS_std\": 0.019480662420392036, \"FID\": 173.76484159711126, \"_stamp\": 1551403232.0425167}\n{\"itr\": 4000, \"IS_mean\": 4.962374687194824, \"IS_std\": 0.07276841998100281, \"FID\": 113.86730514283107, \"_stamp\": 1551422228.743057}\n{\"itr\": 6000, \"IS_mean\": 6.939817905426025, \"IS_std\": 0.11417163163423538, \"FID\": 101.63548498447199, \"_stamp\": 1551457139.3400874}\n{\"itr\": 8000, \"IS_mean\": 8.142985343933105, \"IS_std\": 0.11931543797254562, \"FID\": 92.0014385772705, \"_stamp\": 1551476217.2409613}\n{\"itr\": 10000, \"IS_mean\": 10.355518341064453, \"IS_std\": 0.09094739705324173, \"FID\": 83.58068997965364, \"_stamp\": 1551494854.2419689}\n{\"itr\": 12000, \"IS_mean\": 11.288347244262695, \"IS_std\": 0.14952820539474487, \"FID\": 80.98066299357106, \"_stamp\": 1551513232.5049698}\n{\"itr\": 14000, \"IS_mean\": 11.755794525146484, \"IS_std\": 0.17969024181365967, \"FID\": 76.80603924280956, \"_stamp\": 1551531425.150371}\n{\"itr\": 18000, \"IS_mean\": 13.65534496307373, \"IS_std\": 0.11151058971881866, \"FID\": 65.95736694335938, \"_stamp\": 1551588271.9177916}\n{\"itr\": 20000, \"IS_mean\": 14.817827224731445, \"IS_std\": 0.23588882386684418, \"FID\": 61.32061767578125, \"_stamp\": 1551606713.6567464}\n{\"itr\": 22000, \"IS_mean\": 17.16551399230957, \"IS_std\": 0.19506946206092834, \"FID\": 53.387969970703125, \"_stamp\": 1551624876.6513028}\n{\"itr\": 24000, \"IS_mean\": 19.60654067993164, \"IS_std\": 0.5591856837272644, \"FID\": 46.5386962890625, \"_stamp\": 1551642822.6126688}\n{\"itr\": 26000, \"IS_mean\": 21.74416732788086, \"IS_std\": 0.2850531041622162, \"FID\": 41.595001220703125, \"_stamp\": 1551663522.6019194}\n{\"itr\": 28000, \"IS_mean\": 23.923612594604492, \"IS_std\": 0.41587772965431213, \"FID\": 37.894744873046875, \"_stamp\": 1551681794.6567173}\n{\"itr\": 30000, \"IS_mean\": 25.569377899169922, \"IS_std\": 0.3333457112312317, \"FID\": 35.49310302734375, \"_stamp\": 1551699773.7080302}\n{\"itr\": 32000, \"IS_mean\": 26.867944717407227, \"IS_std\": 0.5968036651611328, \"FID\": 33.4849853515625, \"_stamp\": 1551717623.887933}\n{\"itr\": 34000, \"IS_mean\": 28.719074249267578, \"IS_std\": 0.5698027014732361, \"FID\": 31.375518798828125, \"_stamp\": 1551735411.1578612}\n{\"itr\": 36000, \"IS_mean\": 30.587574005126953, \"IS_std\": 0.5044271349906921, \"FID\": 29.432281494140625, \"_stamp\": 1551783380.6357439}\n{\"itr\": 38000, \"IS_mean\": 32.08299255371094, \"IS_std\": 0.49342143535614014, \"FID\": 28.099456787109375, \"_stamp\": 1551801179.6495197}\n{\"itr\": 40000, \"IS_mean\": 34.24657440185547, \"IS_std\": 0.7709177732467651, \"FID\": 26.53802490234375, \"_stamp\": 1551818775.171794}\n{\"itr\": 42000, \"IS_mean\": 35.891212463378906, \"IS_std\": 0.7036871314048767, \"FID\": 25.03021240234375, \"_stamp\": 1551836329.6873965}\n{\"itr\": 44000, \"IS_mean\": 38.184898376464844, \"IS_std\": 0.32996198534965515, \"FID\": 23.4940185546875, \"_stamp\": 1551897864.911537}\n{\"itr\": 46000, \"IS_mean\": 40.239479064941406, \"IS_std\": 0.7761151194572449, \"FID\": 22.53167724609375, \"_stamp\": 1551915406.4840703}\n{\"itr\": 48000, \"IS_mean\": 41.46656036376953, \"IS_std\": 1.1031498908996582, \"FID\": 21.5338134765625, \"_stamp\": 1551932899.6074848}\n{\"itr\": 50000, \"IS_mean\": 43.31670379638672, \"IS_std\": 0.7796809077262878, \"FID\": 20.53253173828125, \"_stamp\": 1551950390.345334}\n{\"itr\": 52000, \"IS_mean\": 45.1517333984375, \"IS_std\": 1.2925242185592651, \"FID\": 19.656646728515625, \"_stamp\": 1551967838.1501615}\n{\"itr\": 54000, \"IS_mean\": 47.638771057128906, \"IS_std\": 1.0689665079116821, \"FID\": 18.898162841796875, \"_stamp\": 1552044534.5349634}\n{\"itr\": 56000, \"IS_mean\": 48.87520217895508, \"IS_std\": 1.1317559480667114, \"FID\": 18.1248779296875, \"_stamp\": 1552061763.3080354}\n{\"itr\": 58000, \"IS_mean\": 49.40987014770508, \"IS_std\": 1.1866596937179565, \"FID\": 17.751922607421875, \"_stamp\": 1552078939.9828825}\n{\"itr\": 60000, \"IS_mean\": 51.051334381103516, \"IS_std\": 1.2281248569488525, \"FID\": 17.19964599609375, \"_stamp\": 1552096167.889482}\n{\"itr\": 62000, \"IS_mean\": 52.0235481262207, \"IS_std\": 0.5391153693199158, \"FID\": 16.62115478515625, \"_stamp\": 1552113417.9520617}\n{\"itr\": 64000, \"IS_mean\": 53.868492126464844, \"IS_std\": 1.327082633972168, \"FID\": 16.237335205078125, \"_stamp\": 1552142961.09602}\n{\"itr\": 66000, \"IS_mean\": 54.978721618652344, \"IS_std\": 0.9502049088478088, \"FID\": 15.81170654296875, \"_stamp\": 1552162403.2232807}\n{\"itr\": 68000, \"IS_mean\": 55.73248291015625, \"IS_std\": 1.0323851108551025, \"FID\": 15.545623779296875, \"_stamp\": 1552181112.676657}\n{\"itr\": 70000, \"IS_mean\": 56.78422927856445, \"IS_std\": 1.211003303527832, \"FID\": 15.28369140625, \"_stamp\": 1552199498.887533}\n{\"itr\": 72000, \"IS_mean\": 57.972999572753906, \"IS_std\": 0.8668608665466309, \"FID\": 14.86395263671875, \"_stamp\": 1552217782.2738616}\n{\"itr\": 74000, \"IS_mean\": 58.845054626464844, \"IS_std\": 1.4297977685928345, \"FID\": 14.620635986328125, \"_stamp\": 1552251085.1781816}\n{\"itr\": 76000, \"IS_mean\": 59.60982131958008, \"IS_std\": 0.9095696210861206, \"FID\": 14.360198974609375, \"_stamp\": 1552270214.9345307}\n{\"itr\": 78000, \"IS_mean\": 60.71195602416992, \"IS_std\": 0.960899829864502, \"FID\": 14.07183837890625, \"_stamp\": 1552288697.1580262}\n{\"itr\": 80000, \"IS_mean\": 61.772125244140625, \"IS_std\": 0.6913255453109741, \"FID\": 13.781585693359375, \"_stamp\": 1552307170.0280282}\n{\"itr\": 82000, \"IS_mean\": 62.98079299926758, \"IS_std\": 1.4735801219940186, \"FID\": 13.55389404296875, \"_stamp\": 1552325252.8553352}\n{\"itr\": 84000, \"IS_mean\": 64.95240783691406, \"IS_std\": 0.9018951654434204, \"FID\": 13.231689453125, \"_stamp\": 1552344135.3111835}\n{\"itr\": 86000, \"IS_mean\": 65.13968658447266, \"IS_std\": 0.8772205114364624, \"FID\": 13.176849365234375, \"_stamp\": 1552362429.6782444}\n{\"itr\": 88000, \"IS_mean\": 65.84476470947266, \"IS_std\": 1.167534351348877, \"FID\": 12.87078857421875, \"_stamp\": 1552380560.7988124}\n{\"itr\": 90000, \"IS_mean\": 67.41099548339844, \"IS_std\": 1.6899267435073853, \"FID\": 12.586517333984375, \"_stamp\": 1552398550.2060475}\n{\"itr\": 92000, \"IS_mean\": 68.63685607910156, \"IS_std\": 1.9431978464126587, \"FID\": 12.49505615234375, \"_stamp\": 1552430781.6406457}\n{\"itr\": 94000, \"IS_mean\": 70.09907531738281, \"IS_std\": 1.0715738534927368, \"FID\": 12.047607421875, \"_stamp\": 1552449001.1950285}\n{\"itr\": 96000, \"IS_mean\": 70.34623718261719, \"IS_std\": 1.7962944507598877, \"FID\": 11.896697998046875, \"_stamp\": 1552466989.3587568}\n{\"itr\": 98000, \"IS_mean\": 71.08210754394531, \"IS_std\": 1.458209753036499, \"FID\": 11.73046875, \"_stamp\": 1552484800.7138846}\n{\"itr\": 100000, \"IS_mean\": 72.24256896972656, \"IS_std\": 1.3259714841842651, \"FID\": 11.7386474609375, \"_stamp\": 1552502538.0269725}\n{\"itr\": 102000, \"IS_mean\": 73.19488525390625, \"IS_std\": 1.3439149856567383, \"FID\": 11.50494384765625, \"_stamp\": 1552523284.4514356}\n{\"itr\": 104000, \"IS_mean\": 73.38243103027344, \"IS_std\": 1.4162707328796387, \"FID\": 11.374542236328125, \"_stamp\": 1552541012.0651608}\n{\"itr\": 106000, \"IS_mean\": 74.95563507080078, \"IS_std\": 1.089124083518982, \"FID\": 11.10479736328125, \"_stamp\": 1552558577.7458107}\n{\"itr\": 108000, \"IS_mean\": 76.42997741699219, \"IS_std\": 1.9282453060150146, \"FID\": 10.998870849609375, \"_stamp\": 1552576111.9480467}\n{\"itr\": 110000, \"IS_mean\": 76.89225769042969, \"IS_std\": 1.4771150350570679, \"FID\": 10.847015380859375, \"_stamp\": 1552593659.445132}\n{\"itr\": 112000, \"IS_mean\": 78.04684448242188, \"IS_std\": 1.4850096702575684, \"FID\": 10.772552490234375, \"_stamp\": 1552616479.5201895}\n{\"itr\": 114000, \"IS_mean\": 79.67677307128906, \"IS_std\": 2.0147368907928467, \"FID\": 10.528045654296875, \"_stamp\": 1552633850.9315467}\n{\"itr\": 116000, \"IS_mean\": 79.8828125, \"IS_std\": 0.978247344493866, \"FID\": 10.626068115234375, \"_stamp\": 1552651198.9012825}\n{\"itr\": 118000, \"IS_mean\": 79.95381164550781, \"IS_std\": 1.8608143329620361, \"FID\": 10.46771240234375, \"_stamp\": 1552668560.4420238}\n{\"itr\": 120000, \"IS_mean\": 82.37217712402344, \"IS_std\": 1.8909310102462769, \"FID\": 10.259033203125, \"_stamp\": 1552749673.4319007}\n{\"itr\": 122000, \"IS_mean\": 83.49666595458984, \"IS_std\": 2.38446044921875, \"FID\": 9.996185302734375, \"_stamp\": 1552766698.2706933}\n{\"itr\": 124000, \"IS_mean\": 83.05189514160156, \"IS_std\": 1.8844469785690308, \"FID\": 10.164398193359375, \"_stamp\": 1552783762.891172}\n{\"itr\": 126000, \"IS_mean\": 84.27763366699219, \"IS_std\": 0.9329544901847839, \"FID\": 10.03509521484375, \"_stamp\": 1552800953.5724175}\n{\"itr\": 128000, \"IS_mean\": 85.84852600097656, \"IS_std\": 2.2698562145233154, \"FID\": 9.91644287109375, \"_stamp\": 1552818112.227726}\n{\"itr\": 130000, \"IS_mean\": 87.356689453125, \"IS_std\": 2.0958640575408936, \"FID\": 9.771148681640625, \"_stamp\": 1552837539.995247}\n{\"itr\": 132000, \"IS_mean\": 88.72562408447266, \"IS_std\": 1.7551432847976685, \"FID\": 9.8258056640625, \"_stamp\": 1552859685.9305944}\n{\"itr\": 134000, \"IS_mean\": 88.0631103515625, \"IS_std\": 1.8199039697647095, \"FID\": 9.957183837890625, \"_stamp\": 1552880037.5408435}\n{\"itr\": 136000, \"IS_mean\": 91.50938415527344, \"IS_std\": 1.9926033020019531, \"FID\": 9.876556396484375, \"_stamp\": 1552899854.652669}\n{\"itr\": 138000, \"IS_mean\": 93.09217834472656, \"IS_std\": 2.3062736988067627, \"FID\": 9.908477783203125, \"_stamp\": 1552921580.958927}"
  },
  {
    "path": "BigGAN_utils/logs/compare_IS.m",
    "content": "clc\nclear all\nclose all\nfclose all;\n\n\n\n%% Get All logs and sort them\ns = {};\nd = dir();\nj = 1;\nfor i = 1:length(d)\n    if  any(strfind(d(i).name,'.jsonl'))\n        s = [s; d(i).name];\n    end\nend\n\n\nj = 1;\nfor i = 1:length(s)\n    fname = s{i,1};\n    % Check if the Inception metrics log exists, and if so, plot it\n    [itr, IS, FID, t] = process_inception_log(fname(1:end - 10), 'log.jsonl');\n    s{i,2} = itr;\n    s{i,3} = IS;\n    s{i,4} = FID;\n    s{i,5} = max(IS);\n    s{i,6} = min(FID);\n    s{i,7} = t;\nend\n% Sort by Inception Score?\n[IS_sorted, IS_index] = sort(cell2mat(s(:,5)));\n% Cutoff inception scores below a certain value?\nthreshold = 22;\nIS_index = IS_index(IS_sorted > threshold);\n\n% Sort by FID?\n[FID_sorted, FID_index] = sort(cell2mat(s(:,6)));\n% Cutoff also based on IS?\n% threshold = 0;\nFID_index = FID_index(IS_sorted > threshold);\n\n\n\n%% Plot things?\ncc = hsv(length(IS_index));\nlegend1 = {};\nlegend2 = {};\nmake_axis=true;%false % Turn this on to see the axis out to 1e6 iterations\nfor i=1:length(IS_index)\n    legend1 = [legend1; s{IS_index(i), 1}];\n    figure(1)\n    plot(s{IS_index(i),2}, s{IS_index(i),3}, 'color', cc(i,:),'linewidth',2)\n    hold on;\n    xlabel('itr'); ylabel('IS');\n    grid on;\n    if make_axis\n        axis([0,1e6,0,80]); % 50% grid on;\n    end\n    legend(legend1,'Interpreter','none')\n    %pause(1) % Turn this on to animate stuff\n    legend2 = [legend2; s{IS_index(i), 1}];\n    figure(2)\n    plot(s{IS_index(i),2}, s{IS_index(i),4}, 'color', cc(i,:),'linewidth',2)\n    hold on;\n    xlabel('itr'); ylabel('FID');\n    j = j + 1;\n    grid on;\n    if make_axis\n        axis([0,1e6,0,50]);% grid on;\n    end\n    legend(legend2, 'Interpreter','none')\n    \nend\n\n%% Quick script to plot IS versus timesteps\nif 0\n    figure(3);\n    this_index=4;\n    subplot(2,1,1);\n    %plot(s{this_index, 2}(2:end), s{this_index, 7}(2:end) - s{this_index, 7}(1:end-1), 'r*');\n    % xlabel('Iteration');ylabel('\\Delta T')\n    plot(s{this_index, 2}, s{this_index, 7}, 'r*');\n    xlabel('Iteration');ylabel('T')\n    subplot(2,1,2);\n    plot(s{this_index, 2}, s{this_index, 3}, 'r', 'linewidth',2);\n    xlabel('Iteration'), ylabel('Inception score')\n    title(s{this_index,1})\nend"
  },
  {
    "path": "BigGAN_utils/logs/metalog.txt",
    "content": "datetime: 2019-03-18 13:27:59.181225\nconfig: {'dataset': 'I128_hdf5', 'augment': False, 'num_workers': 8, 'pin_memory': True, 'shuffle': True, 'load_in_mem': True, 'use_multiepoch_sampler': True, 'model': 'model', 'G_param': 'SN', 'D_param': 'SN', 'G_ch': 96, 'D_ch': 96, 'G_depth': 1, 'D_depth': 1, 'D_wide': True, 'G_shared': True, 'shared_dim': 128, 'dim_z': 120, 'z_var': 1.0, 'hier': True, 'cross_replica': False, 'mybn': False, 'G_nl': 'inplace_relu', 'D_nl': 'inplace_relu', 'G_attn': '64', 'D_attn': '64', 'norm_style': 'bn', 'seed': 0, 'G_init': 'ortho', 'D_init': 'ortho', 'skip_init': True, 'G_lr': 0.0001, 'D_lr': 0.0004, 'G_B1': 0.0, 'D_B1': 0.0, 'G_B2': 0.999, 'D_B2': 0.999, 'batch_size': 256, 'G_batch_size': 0, 'num_G_accumulations': 8, 'num_D_steps': 1, 'num_D_accumulations': 8, 'split_D': False, 'num_epochs': 400, 'parallel': True, 'G_fp16': False, 'D_fp16': False, 'D_mixed_precision': False, 'G_mixed_precision': False, 'accumulate_stats': False, 'num_standing_accumulations': 16, 'G_eval_mode': True, 'save_every': 500, 'num_save_copies': 2, 'num_best_copies': 5, 'which_best': 'IS', 'no_fid': False, 'test_every': 2000, 'num_inception_images': 50000, 'hashname': False, 'base_root': '', 'dataset_root': 'data', 'weights_root': 'weights', 'logs_root': 'logs', 'samples_root': 'samples', 'pbar': 'mine', 'name_suffix': '', 'experiment_name': 'Jade_BigGAN_B1_bs256x8_fp32', 'config_from_name': False, 'ema': True, 'ema_decay': 0.9999, 'use_ema': True, 'ema_start': 20000, 'adam_eps': 1e-06, 'BN_eps': 1e-05, 'SN_eps': 1e-06, 'num_G_SVs': 1, 'num_D_SVs': 1, 'num_G_SV_itrs': 1, 'num_D_SV_itrs': 1, 'G_ortho': 0.0, 'D_ortho': 0.0, 'toggle_grads': True, 'which_train_fn': 'GAN', 'load_weights': '', 'resume': True, 'logstyle': '%3.3e', 'log_G_spectra': False, 'log_D_spectra': False, 'sv_log_interval': 10, 'resolution': 128, 'n_classes': 1000, 'G_activation': ReLU(inplace), 'D_activation': ReLU(inplace)}\nstate: {'itr': 137500, 'epoch': 2, 'save_num': 0, 'save_best_num': 1, 'best_IS': 91.509384, 'best_FID': tensor(9.7711, 'config': {'dataset': 'I128_hdf5', 'augment': False, 'num_workers': 8, 'pin_memory': True, 'shuffle': True, 'load_in_mem': True, 'use_multiepoch_sampler': True, 'model': 'model', 'G_param': 'SN', 'D_param': 'SN', 'G_ch': 96, 'D_ch': 96, 'D_wide': True, 'G_shared': True, 'shared_dim': 128, 'dim_z': 120, 'hier': True, 'cross_replica': False, 'mybn': False, 'G_nl': 'inplace_relu', 'D_nl': 'inplace_relu', 'G_attn': '64', 'D_attn': '64', 'norm_style': 'bn', 'seed': 0, 'G_init': 'ortho', 'D_init': 'ortho', 'skip_init': False, 'G_lr': 0.0001, 'D_lr': 0.0004, 'G_B1': 0.0, 'D_B1': 0.0, 'G_B2': 0.999, 'D_B2': 0.999, 'batch_size': 256, 'G_batch_size': 0, 'num_G_accumulations': 8, 'num_D_steps': 1, 'num_D_accumulations': 8, 'split_D': False, 'num_epochs': 100, 'parallel': True, 'G_fp16': False, 'D_fp16': False, 'D_mixed_precision': False, 'G_mixed_precision': False, 'accumulate_stats': False, 'num_standing_accumulations': 16, 'BN_sync': False, 'G_eval_mode': True, 'save_every': 500, 'num_save_copies': 2, 'num_best_copies': 5, 'which_best': 'IS', 'no_fid': False, 'test_every': 2000, 'num_inception_images': 50000, 'hashname': False, 'base_root': '', 'dataset_root': 'data', 'weights_root': 'weights', 'logs_root': 'logs', 'samples_root': 'samples', 'pbar': 'mine', 'name_suffix': '', 'experiment_name': 'Jade_BigGAN_B1_bs256x8_fp32', 'ema': True, 'ema_decay': 0.9999, 'use_ema': True, 'ema_start': 20000, 'adam_eps': 1e-06, 'BN_eps': 1e-05, 'SN_eps': 1e-06, 'num_G_SVs': 1, 'num_D_SVs': 1, 'num_G_SV_itrs': 1, 'num_D_SV_itrs': 1, 'G_ortho': 0.0, 'D_ortho': 0.0, 'toggle_grads': True, 'which_train_fn': 'GAN', 'load_weights': '', 'resume': False, 'logstyle': '%3.3e', 'log_G_spectra': False, 'log_D_spectra': False, 'sv_log_interval': 10, 'resolution': 128, 'n_classes': 1000, 'G_activation': ReLU(inplace), 'D_activation': ReLU(inplace)}}\n"
  },
  {
    "path": "BigGAN_utils/logs/process_inception_log.m",
    "content": "function [itr, IS, FID, t] = process_inception_log(fname, which_log)\nf = sprintf('%s_%s',fname, which_log);%'G_loss.log');\nfid = fopen(f,'r');\nitr = [];\nIS = [];\nFID = [];\nt = [];\ni = 1;\nwhile ~feof(fid);\n    s = fgets(fid);\n    parsed = sscanf(s,'{\"itr\": %d, \"IS_mean\": %f, \"IS_std\": %f, \"FID\": %f, \"_stamp\": %f}');\n    itr(i) = parsed(1);\n    IS(i) = parsed(2);\n    FID(i) = parsed(4);\n    t(i) = parsed(5);\n    i = i + 1;\nend\nfclose(fid);\nend"
  },
  {
    "path": "BigGAN_utils/logs/process_training.m",
    "content": "clc\nclear all\nclose all\nfclose all;\n\n\n\n%% Get all training logs for a given run\ntarget_dir = '.';\ns = {};\nnm = {};\nd = dir(target_dir);\nj = 1;\nfor i = 1:length(d)\n    if  any(strfind(d(i).name,'.log'))\n        s = [s; sprintf('%s\\\\%s', target_dir, d(i).name)];\n        nm = [nm; d(i).name];\n    end\nend\n%% Loop over training logs and acquire data\nD_count = 0;\nG_count = 0;\nfor i = 1:length(s)\n    fname = s{i,1};\n    fid = fopen(s{i,1},'r');\n    % Prepare bookkeeping for sv0\n    if any(strfind(s{i,1},'sv'))\n        if any(strfind(s{i,1},'G_'))\n            G_count = G_count +1;\n        else\n            D_count = D_count + 1;\n        end\n    end\n    itr = [];\n    val = [];\n    j = 1;\n    while ~feof(fid);\n        line = fgets(fid);\n        parsed = sscanf(line, '%d: %e');\n        itr(j) = parsed(1);\n        val(j) = parsed(2);\n        j = j + 1;\n    end\n    s{i,2} = itr;\n    s{i,3} = val;\n    fclose(fid);\nend\n\n%% Plot SVs and losses\nclose all;\nGcc = hsv(G_count);\nDcc = hsv(D_count);\ngi = 1;\ndi = 1;\nli = 1;\nlegendG = {};\nlegendD = {};\nlegendL = {};\nthresh=2; % wavelet denoising threshold\nlosses = {};\nfor i=1:length(s)\n    if any(strfind(s{i,1},'D_loss_real.log')) || any(strfind(s{i,1},'D_loss_fake.log')) || any(strfind(s{i,1},'G_loss.log'))\n        % Select colors\n        if any(strfind(s{i,1},'D_loss_real.log'))\n            color1 = [0.7,0.7,1.0];\n            color2 = [0, 0, 1];\n            dlr = {s{i,2}, s{i,3}, wden(s{i,3},'sqtwolog','s','mln', thresh, 'sym4'), color1, color2};\n            losses = [losses; dlr];\n        elseif any(strfind(s{i,1},'D_loss_fake.log'))\n            color1 = [0.7,1.0,0.7];\n            color2 = [0, 1, 0];\n            dlf = {s{i,2},s{i,3} wden(s{i,3},'sqtwolog','s','mln', thresh, 'sym4'), color1, color2};\n            losses = [losses; dlf];\n        else % g loss\n            color1 = [1.0, 0.7,0.7];\n            color2 = [1, 0, 0];\n            gl = {s{i,2},s{i,3}, wden(s{i,3},'sqtwolog','s','mln', thresh, 'sym4'), color1 color2};\n            losses = [losses; gl];\n        end\n        figure(1); hold on;\n        % Plot the unsmoothed losses; we'll plot the smoothed losses later\n        plot(s{i,2},s{i,3},'color', color1, 'HandleVisibility','off');\n        legendL = [legendL; nm{i}];\n        continue\n    end\n    if any(strfind(s{i,1},'G_'))\n        legendG = [legendG; nm{i}];\n        figure(2); hold on;\n        plot(s{i,2},s{i,3},'color',Gcc(gi,:),'linewidth',2);\n        gi = gi+1;\n    elseif any(strfind(s{i,1},'D_'))\n        legendD = [legendD; nm{i}];\n        figure(3); hold on;\n        plot(s{i,2},s{i,3},'color',Dcc(di,:),'linewidth',2);\n        di = di+1;\n    else\n        s{i,1} % Debug print to show the name of the log that was not processed.\n    end\nend\nfigure(1); \n% Plot the smoothed losses last\nfor i = 1:3\n% plot(losses{i,1}, losses{i,2},'color', losses{i,4}, 'HandleVisibility','off');\nplot(losses{i,1},losses{i,3},'color',losses{i,5});\nend\nlegend(legendL, 'Interpreter', 'none'); title('Losses'); xlabel('Generator itr'); ylabel('loss'); axis([0, max(s{end,2}), -1, 4]);\n\nfigure(2); legend(legendG,'Interpreter','none'); title('Singular Values in G'); xlabel('Generator itr'); ylabel('SV0');\nfigure(3); legend(legendD, 'Interpreter', 'none'); title('Singular Values in D'); xlabel('Generator itr'); ylabel('SV0');\n"
  },
  {
    "path": "BigGAN_utils/losses.py",
    "content": "import torch\nimport torch.nn.functional as F\n\n# DCGAN loss\ndef loss_dcgan_dis(dis_fake, dis_real):\n  L1 = torch.mean(F.softplus(-dis_real))\n  L2 = torch.mean(F.softplus(dis_fake))\n  return L1, L2\n\n\ndef loss_dcgan_gen(dis_fake):\n  loss = torch.mean(F.softplus(-dis_fake))\n  return loss\n\n\n# Hinge Loss\ndef loss_hinge_dis(dis_fake, dis_real):\n  loss_real = torch.mean(F.relu(1. - dis_real))\n  loss_fake = torch.mean(F.relu(1. + dis_fake))\n  return loss_real, loss_fake\n# def loss_hinge_dis(dis_fake, dis_real): # This version returns a single loss\n  # loss = torch.mean(F.relu(1. - dis_real))\n  # loss += torch.mean(F.relu(1. + dis_fake))\n  # return loss\n\n\ndef loss_hinge_gen(dis_fake):\n  loss = -torch.mean(dis_fake)\n  return loss\n\n# Default to hinge loss\ngenerator_loss = loss_hinge_gen\ndiscriminator_loss = loss_hinge_dis"
  },
  {
    "path": "BigGAN_utils/make_hdf5.py",
    "content": "\"\"\" Convert dataset to HDF5\n    This script preprocesses a dataset and saves it (images and labels) to \n    an HDF5 file for improved I/O. \"\"\"\nimport os\nimport sys\nfrom argparse import ArgumentParser\nfrom tqdm import tqdm, trange\nimport h5py as h5\n\nimport numpy as np\nimport torch\nimport torchvision.datasets as dset\nimport torchvision.transforms as transforms\nfrom torchvision.utils import save_image\nimport torchvision.transforms as transforms\nfrom torch.utils.data import DataLoader\n\nimport utils\n\ndef prepare_parser():\n  usage = 'Parser for ImageNet HDF5 scripts.'\n  parser = ArgumentParser(description=usage)\n  parser.add_argument(\n    '--dataset', type=str, default='I128',\n    help='Which Dataset to train on, out of I128, I256, C10, C100;'\n         'Append \"_hdf5\" to use the hdf5 version for ISLVRC (default: %(default)s)')\n  parser.add_argument(\n    '--data_root', type=str, default='data',\n    help='Default location where data is stored (default: %(default)s)')\n  parser.add_argument(\n    '--batch_size', type=int, default=256,\n    help='Default overall batchsize (default: %(default)s)')\n  parser.add_argument(\n    '--num_workers', type=int, default=16,\n    help='Number of dataloader workers (default: %(default)s)')\n  parser.add_argument(\n    '--chunk_size', type=int, default=500,\n    help='Default overall batchsize (default: %(default)s)')\n  parser.add_argument(\n    '--compression', action='store_true', default=False,\n    help='Use LZF compression? (default: %(default)s)')\n  return parser\n\n\ndef run(config):\n  if 'hdf5' in config['dataset']:\n    raise ValueError('Reading from an HDF5 file which you will probably be '\n                     'about to overwrite! Override this error only if you know '\n                     'what you''re doing!')\n  # Get image size\n  config['image_size'] = utils.imsize_dict[config['dataset']]\n\n  # Update compression entry\n  config['compression'] = 'lzf' if config['compression'] else None #No compression; can also use 'lzf' \n\n  # Get dataset\n  kwargs = {'num_workers': config['num_workers'], 'pin_memory': False, 'drop_last': False}\n  train_loader = utils.get_data_loaders(dataset=config['dataset'],\n                                        batch_size=config['batch_size'],\n                                        shuffle=False,\n                                        data_root=config['data_root'],\n                                        use_multiepoch_sampler=False,\n                                        **kwargs)[0]     \n\n  # HDF5 supports chunking and compression. You may want to experiment \n  # with different chunk sizes to see how it runs on your machines.\n  # Chunk Size/compression     Read speed @ 256x256   Read speed @ 128x128  Filesize @ 128x128    Time to write @128x128\n  # 1 / None                   20/s\n  # 500 / None                 ramps up to 77/s       102/s                 61GB                  23min\n  # 500 / LZF                                         8/s                   56GB                  23min\n  # 1000 / None                78/s\n  # 5000 / None                81/s\n  # auto:(125,1,16,32) / None                         11/s                  61GB        \n\n  print('Starting to load %s into an HDF5 file with chunk size %i and compression %s...' % (config['dataset'], config['chunk_size'], config['compression']))\n  # Loop over train loader\n  for i,(x,y) in enumerate(tqdm(train_loader)):\n    # Stick X into the range [0, 255] since it's coming from the train loader\n    x = (255 * ((x + 1) / 2.0)).byte().numpy()\n    # Numpyify y\n    y = y.numpy()\n    # If we're on the first batch, prepare the hdf5\n    if i==0:\n      with h5.File(config['data_root'] + '/ILSVRC%i.hdf5' % config['image_size'], 'w') as f:\n        print('Producing dataset of len %d' % len(train_loader.dataset))\n        imgs_dset = f.create_dataset('imgs', x.shape,dtype='uint8', maxshape=(len(train_loader.dataset), 3, config['image_size'], config['image_size']),\n                                     chunks=(config['chunk_size'], 3, config['image_size'], config['image_size']), compression=config['compression']) \n        print('Image chunks chosen as ' + str(imgs_dset.chunks))\n        imgs_dset[...] = x\n        labels_dset = f.create_dataset('labels', y.shape, dtype='int64', maxshape=(len(train_loader.dataset),), chunks=(config['chunk_size'],), compression=config['compression'])\n        print('Label chunks chosen as ' + str(labels_dset.chunks))\n        labels_dset[...] = y\n    # Else append to the hdf5\n    else:\n      with h5.File(config['data_root'] + '/ILSVRC%i.hdf5' % config['image_size'], 'a') as f:\n        f['imgs'].resize(f['imgs'].shape[0] + x.shape[0], axis=0)\n        f['imgs'][-x.shape[0]:] = x\n        f['labels'].resize(f['labels'].shape[0] + y.shape[0], axis=0)\n        f['labels'][-y.shape[0]:] = y\n\n\ndef main():\n  # parse command line and run    \n  parser = prepare_parser()\n  config = vars(parser.parse_args())\n  print(config)\n  run(config)\n\nif __name__ == '__main__':    \n  main()"
  },
  {
    "path": "BigGAN_utils/sample.py",
    "content": "''' Sample\n   This script loads a pretrained net and a weightsfile and sample '''\nimport functools\nimport math\nimport numpy as np\nfrom tqdm import tqdm, trange\n\n\nimport torch\nimport torch.nn as nn\nfrom torch.nn import init\nimport torch.optim as optim\nimport torch.nn.functional as F\nfrom torch.nn import Parameter as P\nimport torchvision\n\n# Import my stuff\nimport inception_utils\nimport utils\nimport losses\n\n\n\ndef run(config):\n  # Prepare state dict, which holds things like epoch # and itr #\n  state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0,\n                'best_IS': 0, 'best_FID': 999999, 'config': config}\n                \n  # Optionally, get the configuration from the state dict. This allows for\n  # recovery of the config provided only a state dict and experiment name,\n  # and can be convenient for writing less verbose sample shell scripts.\n  if config['config_from_name']:\n    utils.load_weights(None, None, state_dict, config['weights_root'], \n                       config['experiment_name'], config['load_weights'], None,\n                       strict=False, load_optim=False)\n    # Ignore items which we might want to overwrite from the command line\n    for item in state_dict['config']:\n      if item not in ['z_var', 'base_root', 'batch_size', 'G_batch_size', 'use_ema', 'G_eval_mode']:\n        config[item] = state_dict['config'][item]\n  \n  # update config (see train.py for explanation)\n  config['resolution'] = utils.imsize_dict[config['dataset']]\n  config['n_classes'] = utils.nclass_dict[config['dataset']]\n  config['G_activation'] = utils.activation_dict[config['G_nl']]\n  config['D_activation'] = utils.activation_dict[config['D_nl']]\n  config = utils.update_config_roots(config)\n  config['skip_init'] = True\n  config['no_optim'] = True\n  device = 'cuda'\n  \n  # Seed RNG\n  utils.seed_rng(config['seed'])\n   \n  # Setup cudnn.benchmark for free speed\n  torch.backends.cudnn.benchmark = True\n  \n  # Import the model--this line allows us to dynamically select different files.\n  model = __import__(config['model'])\n  experiment_name = (config['experiment_name'] if config['experiment_name']\n                       else utils.name_from_config(config))\n  print('Experiment name is %s' % experiment_name)\n  \n  G = model.Generator(**config).cuda()\n  utils.count_parameters(G)\n  \n  # Load weights\n  print('Loading weights...')\n  # Here is where we deal with the ema--load ema weights or load normal weights\n  utils.load_weights(G if not (config['use_ema']) else None, None, state_dict, \n                     config['weights_root'], experiment_name, config['load_weights'],\n                     G if config['ema'] and config['use_ema'] else None,\n                     strict=False, load_optim=False)\n  # Update batch size setting used for G\n  G_batch_size = max(config['G_batch_size'], config['batch_size']) \n  z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'],\n                             device=device, fp16=config['G_fp16'], \n                             z_var=config['z_var'])\n  \n  if config['G_eval_mode']:\n    print('Putting G in eval mode..')\n    G.eval()\n  else:\n    print('G is in %s mode...' % ('training' if G.training else 'eval'))\n    \n  #Sample function\n  sample = functools.partial(utils.sample, G=G, z_=z_, y_=y_, config=config)  \n  if config['accumulate_stats']:\n    print('Accumulating standing stats across %d accumulations...' % config['num_standing_accumulations'])\n    utils.accumulate_standing_stats(G, z_, y_, config['n_classes'],\n                                    config['num_standing_accumulations'])\n    \n  \n  # Sample a number of images and save them to an NPZ, for use with TF-Inception\n  if config['sample_npz']:\n    # Lists to hold images and labels for images\n    x, y = [], []\n    print('Sampling %d images and saving them to npz...' % config['sample_num_npz'])\n    for i in trange(int(np.ceil(config['sample_num_npz'] / float(G_batch_size)))):\n      with torch.no_grad():\n        images, labels = sample()\n      x += [np.uint8(255 * (images.cpu().numpy() + 1) / 2.)]\n      y += [labels.cpu().numpy()]\n    x = np.concatenate(x, 0)[:config['sample_num_npz']]\n    y = np.concatenate(y, 0)[:config['sample_num_npz']]    \n    print('Images shape: %s, Labels shape: %s' % (x.shape, y.shape))\n    npz_filename = '%s/%s/samples.npz' % (config['samples_root'], experiment_name)\n    print('Saving npz to %s...' % npz_filename)\n    np.savez(npz_filename, **{'x' : x, 'y' : y})\n  \n  # Prepare sample sheets\n  if config['sample_sheets']:\n    print('Preparing conditional sample sheets...')\n    utils.sample_sheet(G, classes_per_sheet=utils.classes_per_sheet_dict[config['dataset']], \n                         num_classes=config['n_classes'], \n                         samples_per_class=10, parallel=config['parallel'],\n                         samples_root=config['samples_root'], \n                         experiment_name=experiment_name,\n                         folder_number=config['sample_sheet_folder_num'],\n                         z_=z_,)\n  # Sample interp sheets\n  if config['sample_interps']:\n    print('Preparing interp sheets...')\n    for fix_z, fix_y in zip([False, False, True], [False, True, False]):\n      utils.interp_sheet(G, num_per_sheet=16, num_midpoints=8,\n                         num_classes=config['n_classes'], \n                         parallel=config['parallel'], \n                         samples_root=config['samples_root'], \n                         experiment_name=experiment_name,\n                         folder_number=config['sample_sheet_folder_num'], \n                         sheet_number=0,\n                         fix_z=fix_z, fix_y=fix_y, device='cuda')\n  # Sample random sheet\n  if config['sample_random']:\n    print('Preparing random sample sheet...')\n    images, labels = sample()    \n    torchvision.utils.save_image(images.float(),\n                                 '%s/%s/random_samples.jpg' % (config['samples_root'], experiment_name),\n                                 nrow=int(G_batch_size**0.5),\n                                 normalize=True)\n\n  # Get Inception Score and FID\n  get_inception_metrics = inception_utils.prepare_inception_metrics(config['dataset'], config['parallel'], config['no_fid'])\n  # Prepare a simple function get metrics that we use for trunc curves\n  def get_metrics():\n    sample = functools.partial(utils.sample, G=G, z_=z_, y_=y_, config=config)    \n    IS_mean, IS_std, FID = get_inception_metrics(sample, config['num_inception_images'], num_splits=10, prints=False)\n    # Prepare output string\n    outstring = 'Using %s weights ' % ('ema' if config['use_ema'] else 'non-ema')\n    outstring += 'in %s mode, ' % ('eval' if config['G_eval_mode'] else 'training')\n    outstring += 'with noise variance %3.3f, ' % z_.var\n    outstring += 'over %d images, ' % config['num_inception_images']\n    if config['accumulate_stats'] or not config['G_eval_mode']:\n      outstring += 'with batch size %d, ' % G_batch_size\n    if config['accumulate_stats']:\n      outstring += 'using %d standing stat accumulations, ' % config['num_standing_accumulations']\n    outstring += 'Itr %d: PYTORCH UNOFFICIAL Inception Score is %3.3f +/- %3.3f, PYTORCH UNOFFICIAL FID is %5.4f' % (state_dict['itr'], IS_mean, IS_std, FID)\n    print(outstring)\n  if config['sample_inception_metrics']: \n    print('Calculating Inception metrics...')\n    get_metrics()\n    \n  # Sample truncation curve stuff. This is basically the same as the inception metrics code\n  if config['sample_trunc_curves']:\n    start, step, end = [float(item) for item in config['sample_trunc_curves'].split('_')]\n    print('Getting truncation values for variance in range (%3.3f:%3.3f:%3.3f)...' % (start, step, end))\n    for var in np.arange(start, end + step, step):     \n      z_.var = var\n      # Optionally comment this out if you want to run with standing stats\n      # accumulated at one z variance setting\n      if config['accumulate_stats']:\n        utils.accumulate_standing_stats(G, z_, y_, config['n_classes'],\n                                    config['num_standing_accumulations'])\n      get_metrics()\ndef main():\n  # parse command line and run    \n  parser = utils.prepare_parser()\n  parser = utils.add_sample_parser(parser)\n  config = vars(parser.parse_args())\n  print(config)\n  run(config)\n  \nif __name__ == '__main__':    \n  main()"
  },
  {
    "path": "BigGAN_utils/scripts/launch_BigGAN_bs256x8.sh",
    "content": "#!/bin/bash\npython train.py \\\n--dataset I128_hdf5 --parallel --shuffle  --num_workers 8 --batch_size 256 --load_in_mem  \\\n--num_G_accumulations 8 --num_D_accumulations 8 \\\n--num_D_steps 1 --G_lr 1e-4 --D_lr 4e-4 --D_B2 0.999 --G_B2 0.999 \\\n--G_attn 64 --D_attn 64 \\\n--G_nl inplace_relu --D_nl inplace_relu \\\n--SN_eps 1e-6 --BN_eps 1e-5 --adam_eps 1e-6 \\\n--G_ortho 0.0 \\\n--G_shared \\\n--G_init ortho --D_init ortho \\\n--hier --dim_z 120 --shared_dim 128 \\\n--G_eval_mode \\\n--G_ch 96 --D_ch 96 \\\n--ema --use_ema --ema_start 20000 \\\n--test_every 2000 --save_every 1000 --num_best_copies 5 --num_save_copies 2 --seed 0 \\\n--use_multiepoch_sampler \\"
  },
  {
    "path": "BigGAN_utils/scripts/launch_BigGAN_bs512x4.sh",
    "content": "#!/bin/bash\npython train.py \\\n--dataset I128_hdf5 --parallel --shuffle  --num_workers 8 --batch_size 512 --load_in_mem  \\\n--num_G_accumulations 4 --num_D_accumulations 4 \\\n--num_D_steps 1 --G_lr 1e-4 --D_lr 4e-4 --D_B2 0.999 --G_B2 0.999 \\\n--G_attn 64 --D_attn 64 \\\n--G_nl inplace_relu --D_nl inplace_relu \\\n--SN_eps 1e-6 --BN_eps 1e-5 --adam_eps 1e-6 \\\n--G_ortho 0.0 \\\n--G_shared \\\n--G_init ortho --D_init ortho \\\n--hier --dim_z 120 --shared_dim 128 \\\n--G_eval_mode \\\n--G_ch 96 --D_ch 96 \\\n--ema --use_ema --ema_start 20000 \\\n--test_every 2000 --save_every 1000 --num_best_copies 5 --num_save_copies 2 --seed 0 \\\n--use_multiepoch_sampler \\"
  },
  {
    "path": "BigGAN_utils/scripts/launch_BigGAN_ch64_bs256x8.sh",
    "content": "#!/bin/bash\npython train.py \\\n--dataset I128_hdf5 --parallel --shuffle  --num_workers 8 --batch_size 256 --load_in_mem  \\\n--num_G_accumulations 8 --num_D_accumulations 8 \\\n--num_D_steps 1 --G_lr 1e-4 --D_lr 4e-4 --D_B2 0.999 --G_B2 0.999 \\\n--G_attn 64 --D_attn 64 \\\n--G_nl inplace_relu --D_nl inplace_relu \\\n--SN_eps 1e-6 --BN_eps 1e-5 --adam_eps 1e-6 \\\n--G_ortho 0.0 \\\n--G_shared \\\n--G_init ortho --D_init ortho \\\n--hier --dim_z 120 --shared_dim 128 \\\n--G_eval_mode \\\n--G_ch 64 --G_ch 64 \\\n--ema --use_ema --ema_start 20000 \\\n--test_every 2000 --save_every 1000 --num_best_copies 5 --num_save_copies 2 --seed 0 \\\n--use_multiepoch_sampler"
  },
  {
    "path": "BigGAN_utils/scripts/launch_BigGAN_deep.sh",
    "content": "#!/bin/bash\npython train.py \\\n--model BigGANdeep \\\n--dataset I128_hdf5 --parallel --shuffle  --num_workers 8 --batch_size 256 \\\n--num_G_accumulations 8 --num_D_accumulations 8 \\\n--num_D_steps 1 --G_lr 1e-4 --D_lr 4e-4 --D_B2 0.999 --G_B2 0.999 \\\n--G_attn 64 --D_attn 64 \\\n--G_ch 128 --D_ch 128 \\\n--G_depth 2 --D_depth 2 \\\n--G_nl inplace_relu --D_nl inplace_relu \\\n--SN_eps 1e-6 --BN_eps 1e-5 --adam_eps 1e-6 \\\n--G_ortho 0.0 \\\n--G_shared \\\n--G_init ortho --D_init ortho \\\n--hier --dim_z 128 --shared_dim 128 \\\n--ema --use_ema --ema_start 20000 --G_eval_mode \\\n--test_every 2000 --save_every 500 --num_best_copies 5 --num_save_copies 2 --seed 0 \\\n--use_multiepoch_sampler \\"
  },
  {
    "path": "BigGAN_utils/scripts/launch_SAGAN_bs128x2_ema.sh",
    "content": "#!/bin/bash\npython train.py \\\n--dataset I128_hdf5 --parallel --shuffle  --num_workers 8 --batch_size 128  \\\n--num_G_accumulations 2 --num_D_accumulations 2 \\\n--num_D_steps 1 --G_lr 1e-4 --D_lr 4e-4 --D_B2 0.999 --G_B2 0.999 \\\n--G_attn 64 --D_attn 64 \\\n--G_nl relu --D_nl relu \\\n--SN_eps 1e-8 --BN_eps 1e-5 --adam_eps 1e-8 \\\n--G_ortho 0.0 \\\n--G_init xavier --D_init xavier \\\n--ema --use_ema --ema_start 2000 --G_eval_mode \\\n--test_every 2000 --save_every 1000 --num_best_copies 5 --num_save_copies 2 --seed 0 \\\n--name_suffix SAGAN_ema \\"
  },
  {
    "path": "BigGAN_utils/scripts/launch_SNGAN.sh",
    "content": "#!/bin/bash\npython train.py \\\n--dataset I128_hdf5 --parallel --shuffle  --num_workers 8 --batch_size 64  \\\n--num_G_accumulations 1 --num_D_accumulations 1 \\\n--num_D_steps 5 --G_lr 2e-4 --D_lr 2e-4 --D_B2 0.900 --G_B2 0.900 \\\n--G_attn 0 --D_attn 0 \\\n--G_nl relu --D_nl relu \\\n--SN_eps 1e-8 --BN_eps 1e-5 --adam_eps 1e-8 \\\n--G_ortho 0.0 \\\n--D_thin \\\n--G_init xavier --D_init xavier \\\n --G_eval_mode \\\n--test_every 2000 --save_every 1000 --num_best_copies 5 --num_save_copies 2 --seed 0 \\\n--name_suffix SNGAN \\"
  },
  {
    "path": "BigGAN_utils/scripts/launch_cifar_ema.sh",
    "content": "#!/bin/bash\nCUDA_VISIBLE_DEVICES=0,1 python train.py \\\n--shuffle --batch_size 50 --parallel \\\n--num_G_accumulations 1 --num_D_accumulations 1 --num_epochs 500 \\\n--num_D_steps 4 --G_lr 2e-4 --D_lr 2e-4 \\\n--dataset C10 \\\n--G_ortho 0.0 \\\n--G_attn 0 --D_attn 0 \\\n--G_init N02 --D_init N02 \\\n--ema --use_ema --ema_start 1000 \\\n--test_every 5000 --save_every 2000 --num_best_copies 5 --num_save_copies 2 --seed 0"
  },
  {
    "path": "BigGAN_utils/scripts/sample_BigGAN_bs256x8.sh",
    "content": "# use z_var to change the variance of z for all the sampling\n# use --mybn --accumulate_stats --num_standing_accumulations 32 to \n# use running stats\npython sample.py \\\n--dataset I128_hdf5 --parallel --shuffle  --num_workers 8 --batch_size 256  \\\n--num_G_accumulations 8 --num_D_accumulations 8 \\\n--num_D_steps 1 --G_lr 1e-4 --D_lr 4e-4 --D_B2 0.999 --G_B2 0.999 \\\n--G_attn 64 --D_attn 64 \\\n--G_ch 96 --D_ch 96 \\\n--G_nl inplace_relu --D_nl inplace_relu \\\n--SN_eps 1e-6 --BN_eps 1e-5 --adam_eps 1e-6 \\\n--G_ortho 0.0 \\\n--G_shared \\\n--G_init ortho --D_init ortho --skip_init \\\n--hier --dim_z 120 --shared_dim 128 \\\n--ema --ema_start 20000 \\\n--use_multiepoch_sampler \\\n--test_every 2000 --save_every 1000 --num_best_copies 5 --num_save_copies 2 --seed 0 \\\n--skip_init --G_batch_size 512  --use_ema --G_eval_mode --sample_trunc_curves 0.05_0.05_1.0  \\\n--sample_inception_metrics --sample_npz  --sample_random --sample_sheets --sample_interps\n"
  },
  {
    "path": "BigGAN_utils/scripts/sample_cifar_ema.sh",
    "content": "#!/bin/bash\nCUDA_VISIBLE_DEVICES=0,1 python sample.py \\\n--shuffle --batch_size 50 --G_batch_size 256 --parallel \\\n--num_G_accumulations 1 --num_D_accumulations 1 --num_epochs 500 \\\n--num_D_steps 4 --G_lr 2e-4 --D_lr 2e-4 \\\n--dataset C10 \\\n--G_ortho 0.0 \\\n--G_attn 0 --D_attn 0 \\\n--G_init N02 --D_init N02 \\\n--ema --use_ema --ema_start 1000 \\\n--test_every 5000 --save_every 2000 --num_best_copies 5 --num_save_copies 2 --seed 0"
  },
  {
    "path": "BigGAN_utils/scripts/utils/duplicate.sh",
    "content": "#duplicate.sh\nsource=BigGAN_I128_hdf5_seed0_Gch64_Dch64_bs256_Glr1.0e-04_Dlr4.0e-04_Gnlinplace_relu_Dnlinplace_relu_Ginitxavier_Dinitxavier_Gshared_alex0\ntarget=BigGAN_I128_hdf5_seed0_Gch64_Dch64_bs256_Glr1.0e-04_Dlr4.0e-04_Gnlinplace_relu_Dnlinplace_relu_Ginitxavier_Dinitxavier_Gshared_alex0A\nlogs_root=logs\nweights_root=weights\necho \"copying ${source} to ${target}\"\ncp -r ${logs_root}/${source} ${logs_root}/${target}\ncp ${logs_root}/${source}_log.jsonl ${logs_root}/${target}_log.jsonl\ncp ${weights_root}/${source}_G.pth ${weights_root}/${target}_G.pth\ncp ${weights_root}/${source}_G_ema.pth ${weights_root}/${target}_G_ema.pth\ncp ${weights_root}/${source}_D.pth ${weights_root}/${target}_D.pth\ncp ${weights_root}/${source}_G_optim.pth ${weights_root}/${target}_G_optim.pth\ncp ${weights_root}/${source}_D_optim.pth ${weights_root}/${target}_D_optim.pth\ncp ${weights_root}/${source}_state_dict.pth ${weights_root}/${target}_state_dict.pth"
  },
  {
    "path": "BigGAN_utils/scripts/utils/prepare_data.sh",
    "content": "#!/bin/bash\npython make_hdf5.py --dataset I128 --batch_size 256 --data_root data\npython calculate_inception_moments.py --dataset I128_hdf5 --data_root data"
  },
  {
    "path": "BigGAN_utils/sync_batchnorm/__init__.py",
    "content": "# -*- coding: utf-8 -*-\n# File   : __init__.py\n# Author : Jiayuan Mao\n# Email  : maojiayuan@gmail.com\n# Date   : 27/01/2018\n# \n# This file is part of Synchronized-BatchNorm-PyTorch.\n# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch\n# Distributed under MIT License.\n\nfrom .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d\nfrom .replicate import DataParallelWithCallback, patch_replication_callback\n"
  },
  {
    "path": "BigGAN_utils/sync_batchnorm/batchnorm.py",
    "content": "# -*- coding: utf-8 -*-\n# File   : batchnorm.py\n# Author : Jiayuan Mao\n# Email  : maojiayuan@gmail.com\n# Date   : 27/01/2018\n#\n# This file is part of Synchronized-BatchNorm-PyTorch.\n# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch\n# Distributed under MIT License.\n\nimport collections\n\nimport torch\nimport torch.nn.functional as F\n\nfrom torch.nn.modules.batchnorm import _BatchNorm\nfrom torch.nn.parallel._functions import ReduceAddCoalesced, Broadcast\n\nfrom .comm import SyncMaster\n\n__all__ = ['SynchronizedBatchNorm1d', 'SynchronizedBatchNorm2d', 'SynchronizedBatchNorm3d']\n\n\ndef _sum_ft(tensor):\n    \"\"\"sum over the first and last dimention\"\"\"\n    return tensor.sum(dim=0).sum(dim=-1)\n\n\ndef _unsqueeze_ft(tensor):\n    \"\"\"add new dementions at the front and the tail\"\"\"\n    return tensor.unsqueeze(0).unsqueeze(-1)\n\n\n_ChildMessage = collections.namedtuple('_ChildMessage', ['sum', 'ssum', 'sum_size'])\n_MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'inv_std'])\n# _MasterMessage = collections.namedtuple('_MasterMessage', ['sum', 'ssum', 'sum_size'])\n\nclass _SynchronizedBatchNorm(_BatchNorm):\n    def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True):\n        super(_SynchronizedBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=affine)\n\n        self._sync_master = SyncMaster(self._data_parallel_master)\n\n        self._is_parallel = False\n        self._parallel_id = None\n        self._slave_pipe = None\n\n    def forward(self, input, gain=None, bias=None):\n        # If it is not parallel computation or is in evaluation mode, use PyTorch's implementation.\n        if not (self._is_parallel and self.training):\n            out = F.batch_norm(\n                input, self.running_mean, self.running_var, self.weight, self.bias,\n                self.training, self.momentum, self.eps)\n            if gain is not None:\n              out = out + gain\n            if bias is not None:\n              out = out + bias\n            return out\n\n        # Resize the input to (B, C, -1).\n        input_shape = input.size()\n        # print(input_shape)\n        input = input.view(input.size(0), input.size(1), -1)\n\n        # Compute the sum and square-sum.\n        sum_size = input.size(0) * input.size(2)\n        input_sum = _sum_ft(input)\n        input_ssum = _sum_ft(input ** 2)\n        # Reduce-and-broadcast the statistics.\n        # print('it begins')\n        if self._parallel_id == 0:\n            mean, inv_std = self._sync_master.run_master(_ChildMessage(input_sum, input_ssum, sum_size))\n        else:\n            mean, inv_std = self._slave_pipe.run_slave(_ChildMessage(input_sum, input_ssum, sum_size))\n        # if self._parallel_id == 0:\n            # # print('here')\n            # sum, ssum, num = self._sync_master.run_master(_ChildMessage(input_sum, input_ssum, sum_size))\n        # else:\n            # # print('there')\n            # sum, ssum, num = self._slave_pipe.run_slave(_ChildMessage(input_sum, input_ssum, sum_size))\n        \n        # print('how2')\n        # num = sum_size\n        # print('Sum: %f, ssum: %f, sumsize: %f, insum: %f' %(float(sum.sum().cpu()), float(ssum.sum().cpu()), float(sum_size), float(input_sum.sum().cpu()))) \n        # Fix the graph\n        # sum = (sum.detach() - input_sum.detach()) + input_sum\n        # ssum = (ssum.detach() - input_ssum.detach()) + input_ssum\n        \n        # mean = sum / num\n        # var = ssum / num - mean ** 2\n        # # var = (ssum - mean * sum) / num\n        # inv_std = torch.rsqrt(var + self.eps)\n        \n        # Compute the output.\n        if gain is not None:\n          # print('gaining')\n          # scale = _unsqueeze_ft(inv_std) * gain.squeeze(-1)\n          # shift = _unsqueeze_ft(mean) * scale - bias.squeeze(-1)\n          # output = input * scale - shift\n          output = (input - _unsqueeze_ft(mean)) * (_unsqueeze_ft(inv_std) * gain.squeeze(-1)) + bias.squeeze(-1)\n        elif self.affine:\n            # MJY:: Fuse the multiplication for speed.\n            output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std * self.weight) + _unsqueeze_ft(self.bias)        \n        else:\n            output = (input - _unsqueeze_ft(mean)) * _unsqueeze_ft(inv_std)\n\n        # Reshape it.\n        return output.view(input_shape)\n\n    def __data_parallel_replicate__(self, ctx, copy_id):\n        self._is_parallel = True\n        self._parallel_id = copy_id\n\n        # parallel_id == 0 means master device.\n        if self._parallel_id == 0:\n            ctx.sync_master = self._sync_master\n        else:\n            self._slave_pipe = ctx.sync_master.register_slave(copy_id)\n\n    def _data_parallel_master(self, intermediates):\n        \"\"\"Reduce the sum and square-sum, compute the statistics, and broadcast it.\"\"\"\n\n        # Always using same \"device order\" makes the ReduceAdd operation faster.\n        # Thanks to:: Tete Xiao (http://tetexiao.com/)\n        intermediates = sorted(intermediates, key=lambda i: i[1].sum.get_device())\n\n        to_reduce = [i[1][:2] for i in intermediates]\n        to_reduce = [j for i in to_reduce for j in i]  # flatten\n        target_gpus = [i[1].sum.get_device() for i in intermediates]\n\n        sum_size = sum([i[1].sum_size for i in intermediates])\n        sum_, ssum = ReduceAddCoalesced.apply(target_gpus[0], 2, *to_reduce)\n        mean, inv_std = self._compute_mean_std(sum_, ssum, sum_size)\n\n        broadcasted = Broadcast.apply(target_gpus, mean, inv_std)\n        # print('a')\n        # print(type(sum_), type(ssum), type(sum_size), sum_.shape, ssum.shape, sum_size)\n        # broadcasted = Broadcast.apply(target_gpus, sum_, ssum, torch.tensor(sum_size).float().to(sum_.device))\n        # print('b')\n        outputs = []\n        for i, rec in enumerate(intermediates):\n            outputs.append((rec[0], _MasterMessage(*broadcasted[i*2:i*2+2])))\n            # outputs.append((rec[0], _MasterMessage(*broadcasted[i*3:i*3+3])))\n\n        return outputs\n\n    def _compute_mean_std(self, sum_, ssum, size):\n        \"\"\"Compute the mean and standard-deviation with sum and square-sum. This method\n        also maintains the moving average on the master device.\"\"\"\n        assert size > 1, 'BatchNorm computes unbiased standard-deviation, which requires size > 1.'\n        mean = sum_ / size\n        sumvar = ssum - sum_ * mean\n        unbias_var = sumvar / (size - 1)\n        bias_var = sumvar / size\n\n        self.running_mean = (1 - self.momentum) * self.running_mean + self.momentum * mean.data\n        self.running_var = (1 - self.momentum) * self.running_var + self.momentum * unbias_var.data\n        return mean, torch.rsqrt(bias_var + self.eps)\n        # return mean, bias_var.clamp(self.eps) ** -0.5\n\n\nclass SynchronizedBatchNorm1d(_SynchronizedBatchNorm):\n    r\"\"\"Applies Synchronized Batch Normalization over a 2d or 3d input that is seen as a\n    mini-batch.\n\n    .. math::\n\n        y = \\frac{x - mean[x]}{ \\sqrt{Var[x] + \\epsilon}} * gamma + beta\n\n    This module differs from the built-in PyTorch BatchNorm1d as the mean and\n    standard-deviation are reduced across all devices during training.\n\n    For example, when one uses `nn.DataParallel` to wrap the network during\n    training, PyTorch's implementation normalize the tensor on each device using\n    the statistics only on that device, which accelerated the computation and\n    is also easy to implement, but the statistics might be inaccurate.\n    Instead, in this synchronized version, the statistics will be computed\n    over all training samples distributed on multiple devices.\n\n    Note that, for one-GPU or CPU-only case, this module behaves exactly same\n    as the built-in PyTorch implementation.\n\n    The mean and standard-deviation are calculated per-dimension over\n    the mini-batches and gamma and beta are learnable parameter vectors\n    of size C (where C is the input size).\n\n    During training, this layer keeps a running estimate of its computed mean\n    and variance. The running sum is kept with a default momentum of 0.1.\n\n    During evaluation, this running mean/variance is used for normalization.\n\n    Because the BatchNorm is done over the `C` dimension, computing statistics\n    on `(N, L)` slices, it's common terminology to call this Temporal BatchNorm\n\n    Args:\n        num_features: num_features from an expected input of size\n            `batch_size x num_features [x width]`\n        eps: a value added to the denominator for numerical stability.\n            Default: 1e-5\n        momentum: the value used for the running_mean and running_var\n            computation. Default: 0.1\n        affine: a boolean value that when set to ``True``, gives the layer learnable\n            affine parameters. Default: ``True``\n\n    Shape:\n        - Input: :math:`(N, C)` or :math:`(N, C, L)`\n        - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)\n\n    Examples:\n        >>> # With Learnable Parameters\n        >>> m = SynchronizedBatchNorm1d(100)\n        >>> # Without Learnable Parameters\n        >>> m = SynchronizedBatchNorm1d(100, affine=False)\n        >>> input = torch.autograd.Variable(torch.randn(20, 100))\n        >>> output = m(input)\n    \"\"\"\n\n    def _check_input_dim(self, input):\n        if input.dim() != 2 and input.dim() != 3:\n            raise ValueError('expected 2D or 3D input (got {}D input)'\n                             .format(input.dim()))\n        super(SynchronizedBatchNorm1d, self)._check_input_dim(input)\n\n\nclass SynchronizedBatchNorm2d(_SynchronizedBatchNorm):\n    r\"\"\"Applies Batch Normalization over a 4d input that is seen as a mini-batch\n    of 3d inputs\n\n    .. math::\n\n        y = \\frac{x - mean[x]}{ \\sqrt{Var[x] + \\epsilon}} * gamma + beta\n\n    This module differs from the built-in PyTorch BatchNorm2d as the mean and\n    standard-deviation are reduced across all devices during training.\n\n    For example, when one uses `nn.DataParallel` to wrap the network during\n    training, PyTorch's implementation normalize the tensor on each device using\n    the statistics only on that device, which accelerated the computation and\n    is also easy to implement, but the statistics might be inaccurate.\n    Instead, in this synchronized version, the statistics will be computed\n    over all training samples distributed on multiple devices.\n\n    Note that, for one-GPU or CPU-only case, this module behaves exactly same\n    as the built-in PyTorch implementation.\n\n    The mean and standard-deviation are calculated per-dimension over\n    the mini-batches and gamma and beta are learnable parameter vectors\n    of size C (where C is the input size).\n\n    During training, this layer keeps a running estimate of its computed mean\n    and variance. The running sum is kept with a default momentum of 0.1.\n\n    During evaluation, this running mean/variance is used for normalization.\n\n    Because the BatchNorm is done over the `C` dimension, computing statistics\n    on `(N, H, W)` slices, it's common terminology to call this Spatial BatchNorm\n\n    Args:\n        num_features: num_features from an expected input of\n            size batch_size x num_features x height x width\n        eps: a value added to the denominator for numerical stability.\n            Default: 1e-5\n        momentum: the value used for the running_mean and running_var\n            computation. Default: 0.1\n        affine: a boolean value that when set to ``True``, gives the layer learnable\n            affine parameters. Default: ``True``\n\n    Shape:\n        - Input: :math:`(N, C, H, W)`\n        - Output: :math:`(N, C, H, W)` (same shape as input)\n\n    Examples:\n        >>> # With Learnable Parameters\n        >>> m = SynchronizedBatchNorm2d(100)\n        >>> # Without Learnable Parameters\n        >>> m = SynchronizedBatchNorm2d(100, affine=False)\n        >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45))\n        >>> output = m(input)\n    \"\"\"\n\n    def _check_input_dim(self, input):\n        if input.dim() != 4:\n            raise ValueError('expected 4D input (got {}D input)'\n                             .format(input.dim()))\n        super(SynchronizedBatchNorm2d, self)._check_input_dim(input)\n\n\nclass SynchronizedBatchNorm3d(_SynchronizedBatchNorm):\n    r\"\"\"Applies Batch Normalization over a 5d input that is seen as a mini-batch\n    of 4d inputs\n\n    .. math::\n\n        y = \\frac{x - mean[x]}{ \\sqrt{Var[x] + \\epsilon}} * gamma + beta\n\n    This module differs from the built-in PyTorch BatchNorm3d as the mean and\n    standard-deviation are reduced across all devices during training.\n\n    For example, when one uses `nn.DataParallel` to wrap the network during\n    training, PyTorch's implementation normalize the tensor on each device using\n    the statistics only on that device, which accelerated the computation and\n    is also easy to implement, but the statistics might be inaccurate.\n    Instead, in this synchronized version, the statistics will be computed\n    over all training samples distributed on multiple devices.\n\n    Note that, for one-GPU or CPU-only case, this module behaves exactly same\n    as the built-in PyTorch implementation.\n\n    The mean and standard-deviation are calculated per-dimension over\n    the mini-batches and gamma and beta are learnable parameter vectors\n    of size C (where C is the input size).\n\n    During training, this layer keeps a running estimate of its computed mean\n    and variance. The running sum is kept with a default momentum of 0.1.\n\n    During evaluation, this running mean/variance is used for normalization.\n\n    Because the BatchNorm is done over the `C` dimension, computing statistics\n    on `(N, D, H, W)` slices, it's common terminology to call this Volumetric BatchNorm\n    or Spatio-temporal BatchNorm\n\n    Args:\n        num_features: num_features from an expected input of\n            size batch_size x num_features x depth x height x width\n        eps: a value added to the denominator for numerical stability.\n            Default: 1e-5\n        momentum: the value used for the running_mean and running_var\n            computation. Default: 0.1\n        affine: a boolean value that when set to ``True``, gives the layer learnable\n            affine parameters. Default: ``True``\n\n    Shape:\n        - Input: :math:`(N, C, D, H, W)`\n        - Output: :math:`(N, C, D, H, W)` (same shape as input)\n\n    Examples:\n        >>> # With Learnable Parameters\n        >>> m = SynchronizedBatchNorm3d(100)\n        >>> # Without Learnable Parameters\n        >>> m = SynchronizedBatchNorm3d(100, affine=False)\n        >>> input = torch.autograd.Variable(torch.randn(20, 100, 35, 45, 10))\n        >>> output = m(input)\n    \"\"\"\n\n    def _check_input_dim(self, input):\n        if input.dim() != 5:\n            raise ValueError('expected 5D input (got {}D input)'\n                             .format(input.dim()))\n        super(SynchronizedBatchNorm3d, self)._check_input_dim(input)"
  },
  {
    "path": "BigGAN_utils/sync_batchnorm/batchnorm_reimpl.py",
    "content": "#! /usr/bin/env python3\n# -*- coding: utf-8 -*-\n# File   : batchnorm_reimpl.py\n# Author : acgtyrant\n# Date   : 11/01/2018\n#\n# This file is part of Synchronized-BatchNorm-PyTorch.\n# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch\n# Distributed under MIT License.\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.init as init\n\n__all__ = ['BatchNormReimpl']\n\n\nclass BatchNorm2dReimpl(nn.Module):\n    \"\"\"\n    A re-implementation of batch normalization, used for testing the numerical\n    stability.\n\n    Author: acgtyrant\n    See also:\n    https://github.com/vacancy/Synchronized-BatchNorm-PyTorch/issues/14\n    \"\"\"\n    def __init__(self, num_features, eps=1e-5, momentum=0.1):\n        super().__init__()\n\n        self.num_features = num_features\n        self.eps = eps\n        self.momentum = momentum\n        self.weight = nn.Parameter(torch.empty(num_features))\n        self.bias = nn.Parameter(torch.empty(num_features))\n        self.register_buffer('running_mean', torch.zeros(num_features))\n        self.register_buffer('running_var', torch.ones(num_features))\n        self.reset_parameters()\n\n    def reset_running_stats(self):\n        self.running_mean.zero_()\n        self.running_var.fill_(1)\n\n    def reset_parameters(self):\n        self.reset_running_stats()\n        init.uniform_(self.weight)\n        init.zeros_(self.bias)\n\n    def forward(self, input_):\n        batchsize, channels, height, width = input_.size()\n        numel = batchsize * height * width\n        input_ = input_.permute(1, 0, 2, 3).contiguous().view(channels, numel)\n        sum_ = input_.sum(1)\n        sum_of_square = input_.pow(2).sum(1)\n        mean = sum_ / numel\n        sumvar = sum_of_square - sum_ * mean\n\n        self.running_mean = (\n                (1 - self.momentum) * self.running_mean\n                + self.momentum * mean.detach()\n        )\n        unbias_var = sumvar / (numel - 1)\n        self.running_var = (\n                (1 - self.momentum) * self.running_var\n                + self.momentum * unbias_var.detach()\n        )\n\n        bias_var = sumvar / numel\n        inv_std = 1 / (bias_var + self.eps).pow(0.5)\n        output = (\n                (input_ - mean.unsqueeze(1)) * inv_std.unsqueeze(1) *\n                self.weight.unsqueeze(1) + self.bias.unsqueeze(1))\n\n        return output.view(channels, batchsize, height, width).permute(1, 0, 2, 3).contiguous()\n\n"
  },
  {
    "path": "BigGAN_utils/sync_batchnorm/comm.py",
    "content": "# -*- coding: utf-8 -*-\n# File   : comm.py\n# Author : Jiayuan Mao\n# Email  : maojiayuan@gmail.com\n# Date   : 27/01/2018\n# \n# This file is part of Synchronized-BatchNorm-PyTorch.\n# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch\n# Distributed under MIT License.\n\nimport queue\nimport collections\nimport threading\n\n__all__ = ['FutureResult', 'SlavePipe', 'SyncMaster']\n\n\nclass FutureResult(object):\n    \"\"\"A thread-safe future implementation. Used only as one-to-one pipe.\"\"\"\n\n    def __init__(self):\n        self._result = None\n        self._lock = threading.Lock()\n        self._cond = threading.Condition(self._lock)\n\n    def put(self, result):\n        with self._lock:\n            assert self._result is None, 'Previous result has\\'t been fetched.'\n            self._result = result\n            self._cond.notify()\n\n    def get(self):\n        with self._lock:\n            if self._result is None:\n                self._cond.wait()\n\n            res = self._result\n            self._result = None\n            return res\n\n\n_MasterRegistry = collections.namedtuple('MasterRegistry', ['result'])\n_SlavePipeBase = collections.namedtuple('_SlavePipeBase', ['identifier', 'queue', 'result'])\n\n\nclass SlavePipe(_SlavePipeBase):\n    \"\"\"Pipe for master-slave communication.\"\"\"\n\n    def run_slave(self, msg):\n        self.queue.put((self.identifier, msg))\n        ret = self.result.get()\n        self.queue.put(True)\n        return ret\n\n\nclass SyncMaster(object):\n    \"\"\"An abstract `SyncMaster` object.\n\n    - During the replication, as the data parallel will trigger an callback of each module, all slave devices should\n    call `register(id)` and obtain an `SlavePipe` to communicate with the master.\n    - During the forward pass, master device invokes `run_master`, all messages from slave devices will be collected,\n    and passed to a registered callback.\n    - After receiving the messages, the master device should gather the information and determine to message passed\n    back to each slave devices.\n    \"\"\"\n\n    def __init__(self, master_callback):\n        \"\"\"\n\n        Args:\n            master_callback: a callback to be invoked after having collected messages from slave devices.\n        \"\"\"\n        self._master_callback = master_callback\n        self._queue = queue.Queue()\n        self._registry = collections.OrderedDict()\n        self._activated = False\n\n    def __getstate__(self):\n        return {'master_callback': self._master_callback}\n\n    def __setstate__(self, state):\n        self.__init__(state['master_callback'])\n\n    def register_slave(self, identifier):\n        \"\"\"\n        Register an slave device.\n\n        Args:\n            identifier: an identifier, usually is the device id.\n\n        Returns: a `SlavePipe` object which can be used to communicate with the master device.\n\n        \"\"\"\n        if self._activated:\n            assert self._queue.empty(), 'Queue is not clean before next initialization.'\n            self._activated = False\n            self._registry.clear()\n        future = FutureResult()\n        self._registry[identifier] = _MasterRegistry(future)\n        return SlavePipe(identifier, self._queue, future)\n\n    def run_master(self, master_msg):\n        \"\"\"\n        Main entry for the master device in each forward pass.\n        The messages were first collected from each devices (including the master device), and then\n        an callback will be invoked to compute the message to be sent back to each devices\n        (including the master device).\n\n        Args:\n            master_msg: the message that the master want to send to itself. This will be placed as the first\n            message when calling `master_callback`. For detailed usage, see `_SynchronizedBatchNorm` for an example.\n\n        Returns: the message to be sent back to the master device.\n\n        \"\"\"\n        self._activated = True\n\n        intermediates = [(0, master_msg)]\n        for i in range(self.nr_slaves):\n            intermediates.append(self._queue.get())\n\n        results = self._master_callback(intermediates)\n        assert results[0][0] == 0, 'The first result should belongs to the master.'\n\n        for i, res in results:\n            if i == 0:\n                continue\n            self._registry[i].result.put(res)\n\n        for i in range(self.nr_slaves):\n            assert self._queue.get() is True\n\n        return results[0][1]\n\n    @property\n    def nr_slaves(self):\n        return len(self._registry)\n"
  },
  {
    "path": "BigGAN_utils/sync_batchnorm/replicate.py",
    "content": "# -*- coding: utf-8 -*-\n# File   : replicate.py\n# Author : Jiayuan Mao\n# Email  : maojiayuan@gmail.com\n# Date   : 27/01/2018\n# \n# This file is part of Synchronized-BatchNorm-PyTorch.\n# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch\n# Distributed under MIT License.\n\nimport functools\n\nfrom torch.nn.parallel.data_parallel import DataParallel\n\n__all__ = [\n    'CallbackContext',\n    'execute_replication_callbacks',\n    'DataParallelWithCallback',\n    'patch_replication_callback'\n]\n\n\nclass CallbackContext(object):\n    pass\n\n\ndef execute_replication_callbacks(modules):\n    \"\"\"\n    Execute an replication callback `__data_parallel_replicate__` on each module created by original replication.\n\n    The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`\n\n    Note that, as all modules are isomorphism, we assign each sub-module with a context\n    (shared among multiple copies of this module on different devices).\n    Through this context, different copies can share some information.\n\n    We guarantee that the callback on the master copy (the first copy) will be called ahead of calling the callback\n    of any slave copies.\n    \"\"\"\n    master_copy = modules[0]\n    nr_modules = len(list(master_copy.modules()))\n    ctxs = [CallbackContext() for _ in range(nr_modules)]\n\n    for i, module in enumerate(modules):\n        for j, m in enumerate(module.modules()):\n            if hasattr(m, '__data_parallel_replicate__'):\n                m.__data_parallel_replicate__(ctxs[j], i)\n\n\nclass DataParallelWithCallback(DataParallel):\n    \"\"\"\n    Data Parallel with a replication callback.\n\n    An replication callback `__data_parallel_replicate__` of each module will be invoked after being created by\n    original `replicate` function.\n    The callback will be invoked with arguments `__data_parallel_replicate__(ctx, copy_id)`\n\n    Examples:\n        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)\n        > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])\n        # sync_bn.__data_parallel_replicate__ will be invoked.\n    \"\"\"\n\n    def replicate(self, module, device_ids):\n        modules = super(DataParallelWithCallback, self).replicate(module, device_ids)\n        execute_replication_callbacks(modules)\n        return modules\n\n\ndef patch_replication_callback(data_parallel):\n    \"\"\"\n    Monkey-patch an existing `DataParallel` object. Add the replication callback.\n    Useful when you have customized `DataParallel` implementation.\n\n    Examples:\n        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)\n        > sync_bn = DataParallel(sync_bn, device_ids=[0, 1])\n        > patch_replication_callback(sync_bn)\n        # this is equivalent to\n        > sync_bn = SynchronizedBatchNorm1d(10, eps=1e-5, affine=False)\n        > sync_bn = DataParallelWithCallback(sync_bn, device_ids=[0, 1])\n    \"\"\"\n\n    assert isinstance(data_parallel, DataParallel)\n\n    old_replicate = data_parallel.replicate\n\n    @functools.wraps(old_replicate)\n    def new_replicate(module, device_ids):\n        modules = old_replicate(module, device_ids)\n        execute_replication_callbacks(modules)\n        return modules\n\n    data_parallel.replicate = new_replicate\n"
  },
  {
    "path": "BigGAN_utils/sync_batchnorm/unittest.py",
    "content": "# -*- coding: utf-8 -*-\n# File   : unittest.py\n# Author : Jiayuan Mao\n# Email  : maojiayuan@gmail.com\n# Date   : 27/01/2018\n#\n# This file is part of Synchronized-BatchNorm-PyTorch.\n# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch\n# Distributed under MIT License.\n\nimport unittest\nimport torch\n\n\nclass TorchTestCase(unittest.TestCase):\n    def assertTensorClose(self, x, y):\n        adiff = float((x - y).abs().max())\n        if (y == 0).all():\n            rdiff = 'NaN'\n        else:\n            rdiff = float((adiff / y).abs().max())\n\n        message = (\n            'Tensor close check failed\\n'\n            'adiff={}\\n'\n            'rdiff={}\\n'\n        ).format(adiff, rdiff)\n        self.assertTrue(torch.allclose(x, y), message)\n\n"
  },
  {
    "path": "BigGAN_utils/train.py",
    "content": "\"\"\" BigGAN: The Authorized Unofficial PyTorch release\n    Code by A. Brock and A. Andonian\n    This code is an unofficial reimplementation of\n    \"Large-Scale GAN Training for High Fidelity Natural Image Synthesis,\"\n    by A. Brock, J. Donahue, and K. Simonyan (arXiv 1809.11096).\n\n    Let's go.\n\"\"\"\n\nimport os\nimport functools\nimport math\nimport numpy as np\nfrom tqdm import tqdm, trange\n\n\nimport torch\nimport torch.nn as nn\nfrom torch.nn import init\nimport torch.optim as optim\nimport torch.nn.functional as F\nfrom torch.nn import Parameter as P\nimport torchvision\n\n# Import my stuff\nimport inception_utils\nimport utils\nimport losses\nimport train_fns\nfrom sync_batchnorm import patch_replication_callback\n\n# The main training file. Config is a dictionary specifying the configuration\n# of this training run.\ndef run(config):\n\n  # Update the config dict as necessary\n  # This is for convenience, to add settings derived from the user-specified\n  # configuration into the config-dict (e.g. inferring the number of classes\n  # and size of the images from the dataset, passing in a pytorch object\n  # for the activation specified as a string)\n  config['resolution'] = utils.imsize_dict[config['dataset']]\n  config['n_classes'] = utils.nclass_dict[config['dataset']]\n  config['G_activation'] = utils.activation_dict[config['G_nl']]\n  config['D_activation'] = utils.activation_dict[config['D_nl']]\n  # By default, skip init if resuming training.\n  if config['resume']:\n    print('Skipping initialization for training resumption...')\n    config['skip_init'] = True\n  config = utils.update_config_roots(config)\n  device = 'cuda'\n  \n  # Seed RNG\n  utils.seed_rng(config['seed'])\n\n  # Prepare root folders if necessary\n  utils.prepare_root(config)\n\n  # Setup cudnn.benchmark for free speed\n  torch.backends.cudnn.benchmark = True\n\n  # Import the model--this line allows us to dynamically select different files.\n  model = __import__(config['model'])\n  experiment_name = (config['experiment_name'] if config['experiment_name']\n                       else utils.name_from_config(config))\n  print('Experiment name is %s' % experiment_name)\n\n  # Next, build the model\n  G = model.Generator(**config).to(device)\n  D = model.Discriminator(**config).to(device)\n  \n   # If using EMA, prepare it\n  if config['ema']:\n    print('Preparing EMA for G with decay of {}'.format(config['ema_decay']))\n    G_ema = model.Generator(**{**config, 'skip_init':True, \n                               'no_optim': True}).to(device)\n    ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start'])\n  else:\n    G_ema, ema = None, None\n  \n  # FP16?\n  if config['G_fp16']:\n    print('Casting G to float16...')\n    G = G.half()\n    if config['ema']:\n      G_ema = G_ema.half()\n  if config['D_fp16']:\n    print('Casting D to fp16...')\n    D = D.half()\n    # Consider automatically reducing SN_eps?\n  GD = model.G_D(G, D)\n  print(G)\n  print(D)\n  print('Number of params in G: {} D: {}'.format(\n    *[sum([p.data.nelement() for p in net.parameters()]) for net in [G,D]]))\n  # Prepare state dict, which holds things like epoch # and itr #\n  state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'save_best_num': 0,\n                'best_IS': 0, 'best_FID': 999999, 'config': config}\n\n  # If loading from a pre-trained model, load weights\n  if config['resume']:\n    print('Loading weights...')\n    utils.load_weights(G, D, state_dict,\n                       config['weights_root'], experiment_name, \n                       config['load_weights'] if config['load_weights'] else None,\n                       G_ema if config['ema'] else None)\n\n  # If parallel, parallelize the GD module\n  if config['parallel']:\n    GD = nn.DataParallel(GD)\n    if config['cross_replica']:\n      patch_replication_callback(GD)\n\n  # Prepare loggers for stats; metrics holds test metrics,\n  # lmetrics holds any desired training metrics.\n  test_metrics_fname = '%s/%s_log.jsonl' % (config['logs_root'],\n                                            experiment_name)\n  train_metrics_fname = '%s/%s' % (config['logs_root'], experiment_name)\n  print('Inception Metrics will be saved to {}'.format(test_metrics_fname))\n  test_log = utils.MetricsLogger(test_metrics_fname, \n                                 reinitialize=(not config['resume']))\n  print('Training Metrics will be saved to {}'.format(train_metrics_fname))\n  train_log = utils.MyLogger(train_metrics_fname, \n                             reinitialize=(not config['resume']),\n                             logstyle=config['logstyle'])\n  # Write metadata\n  utils.write_metadata(config['logs_root'], experiment_name, config, state_dict)\n  # Prepare data; the Discriminator's batch size is all that needs to be passed\n  # to the dataloader, as G doesn't require dataloading.\n  # Note that at every loader iteration we pass in enough data to complete\n  # a full D iteration (regardless of number of D steps and accumulations)\n  D_batch_size = (config['batch_size'] * config['num_D_steps']\n                  * config['num_D_accumulations'])\n  loaders = utils.get_data_loaders(**{**config, 'batch_size': D_batch_size,\n                                      'start_itr': state_dict['itr']})\n\n  # Prepare inception metrics: FID and IS\n  get_inception_metrics = inception_utils.prepare_inception_metrics(config['dataset'], config['parallel'], config['no_fid'])\n\n  # Prepare noise and randomly sampled label arrays\n  # Allow for different batch sizes in G\n  G_batch_size = max(config['G_batch_size'], config['batch_size'])\n  z_, y_ = utils.prepare_z_y(G_batch_size, G.dim_z, config['n_classes'],\n                             device=device, fp16=config['G_fp16'])\n  # Prepare a fixed z & y to see individual sample evolution throghout training\n  fixed_z, fixed_y = utils.prepare_z_y(G_batch_size, G.dim_z,\n                                       config['n_classes'], device=device,\n                                       fp16=config['G_fp16'])  \n  fixed_z.sample_()\n  fixed_y.sample_()\n  # Loaders are loaded, prepare the training function\n  if config['which_train_fn'] == 'GAN':\n    train = train_fns.GAN_training_function(G, D, GD, z_, y_, \n                                            ema, state_dict, config)\n  # Else, assume debugging and use the dummy train fn\n  else:\n    train = train_fns.dummy_training_function()\n  # Prepare Sample function for use with inception metrics\n  sample = functools.partial(utils.sample,\n                              G=(G_ema if config['ema'] and config['use_ema']\n                                 else G),\n                              z_=z_, y_=y_, config=config)\n\n  print('Beginning training at epoch %d...' % state_dict['epoch'])\n  # Train for specified number of epochs, although we mostly track G iterations.\n  for epoch in range(state_dict['epoch'], config['num_epochs']):    \n    # Which progressbar to use? TQDM or my own?\n    if config['pbar'] == 'mine':\n      pbar = utils.progress(loaders[0],displaytype='s1k' if config['use_multiepoch_sampler'] else 'eta')\n    else:\n      pbar = tqdm(loaders[0])\n    for i, (x, y) in enumerate(pbar):\n      # Increment the iteration counter\n      state_dict['itr'] += 1\n      # Make sure G and D are in training mode, just in case they got set to eval\n      # For D, which typically doesn't have BN, this shouldn't matter much.\n      G.train()\n      D.train()\n      if config['ema']:\n        G_ema.train()\n      if config['D_fp16']:\n        x, y = x.to(device).half(), y.to(device)\n      else:\n        x, y = x.to(device), y.to(device)\n      metrics = train(x, y)\n      train_log.log(itr=int(state_dict['itr']), **metrics)\n      \n      # Every sv_log_interval, log singular values\n      if (config['sv_log_interval'] > 0) and (not (state_dict['itr'] % config['sv_log_interval'])):\n        train_log.log(itr=int(state_dict['itr']), \n                      **{**utils.get_SVs(G, 'G'), **utils.get_SVs(D, 'D')})\n\n      # If using my progbar, print metrics.\n      if config['pbar'] == 'mine':\n          print(', '.join(['itr: %d' % state_dict['itr']] \n                           + ['%s : %+4.3f' % (key, metrics[key])\n                           for key in metrics]), end=' ')\n\n      # Save weights and copies as configured at specified interval\n      if not (state_dict['itr'] % config['save_every']):\n        if config['G_eval_mode']:\n          print('Switchin G to eval mode...')\n          G.eval()\n          if config['ema']:\n            G_ema.eval()\n        train_fns.save_and_sample(G, D, G_ema, z_, y_, fixed_z, fixed_y, \n                                  state_dict, config, experiment_name)\n\n      # Test every specified interval\n      if not (state_dict['itr'] % config['test_every']):\n        if config['G_eval_mode']:\n          print('Switchin G to eval mode...')\n          G.eval()\n        train_fns.test(G, D, G_ema, z_, y_, state_dict, config, sample,\n                       get_inception_metrics, experiment_name, test_log)\n    # Increment epoch counter at end of epoch\n    state_dict['epoch'] += 1\n\n\ndef main():\n  # parse command line and run\n  parser = utils.prepare_parser()\n  config = vars(parser.parse_args())\n  print(config)\n  run(config)\n\nif __name__ == '__main__':\n  main()"
  },
  {
    "path": "BigGAN_utils/train_fns.py",
    "content": "''' train_fns.py\nFunctions for the main loop of training different conditional image models\n'''\nimport torch\nimport torch.nn as nn\nimport torchvision\nimport os\n\nimport utils\nimport losses\n\n\n# Dummy training function for debugging\ndef dummy_training_function():\n  def train(x, y):\n    return {}\n  return train\n\n\ndef GAN_training_function(G, D, GD, z_, y_, ema, state_dict, config):\n  def train(x, y):\n    G.optim.zero_grad()\n    D.optim.zero_grad()\n    # How many chunks to split x and y into?\n    x = torch.split(x, config['batch_size'])\n    y = torch.split(y, config['batch_size'])\n    counter = 0\n    \n    # Optionally toggle D and G's \"require_grad\"\n    if config['toggle_grads']:\n      utils.toggle_grad(D, True)\n      utils.toggle_grad(G, False)\n      \n    for step_index in range(config['num_D_steps']):\n      # If accumulating gradients, loop multiple times before an optimizer step\n      D.optim.zero_grad()\n      for accumulation_index in range(config['num_D_accumulations']):\n        z_.sample_()\n        y_.sample_()\n        D_fake, D_real = GD(z_[:config['batch_size']], y_[:config['batch_size']], \n                            x[counter], y[counter], train_G=False, \n                            split_D=config['split_D'])\n         \n        # Compute components of D's loss, average them, and divide by \n        # the number of gradient accumulations\n        D_loss_real, D_loss_fake = losses.discriminator_loss(D_fake, D_real)\n        D_loss = (D_loss_real + D_loss_fake) / float(config['num_D_accumulations'])\n        D_loss.backward()\n        counter += 1\n        \n      # Optionally apply ortho reg in D\n      if config['D_ortho'] > 0.0:\n        # Debug print to indicate we're using ortho reg in D.\n        print('using modified ortho reg in D')\n        utils.ortho(D, config['D_ortho'])\n      \n      D.optim.step()\n    \n    # Optionally toggle \"requires_grad\"\n    if config['toggle_grads']:\n      utils.toggle_grad(D, False)\n      utils.toggle_grad(G, True)\n      \n    # Zero G's gradients by default before training G, for safety\n    G.optim.zero_grad()\n    \n    # If accumulating gradients, loop multiple times\n    for accumulation_index in range(config['num_G_accumulations']):    \n      z_.sample_()\n      y_.sample_()\n      D_fake = GD(z_, y_, train_G=True, split_D=config['split_D'])\n      G_loss = losses.generator_loss(D_fake) / float(config['num_G_accumulations'])\n      G_loss.backward()\n    \n    # Optionally apply modified ortho reg in G\n    if config['G_ortho'] > 0.0:\n      print('using modified ortho reg in G') # Debug print to indicate we're using ortho reg in G\n      # Don't ortho reg shared, it makes no sense. Really we should blacklist any embeddings for this\n      utils.ortho(G, config['G_ortho'], \n                  blacklist=[param for param in G.shared.parameters()])\n    G.optim.step()\n    \n    # If we have an ema, update it, regardless of if we test with it or not\n    if config['ema']:\n      ema.update(state_dict['itr'])\n    \n    out = {'G_loss': float(G_loss.item()), \n            'D_loss_real': float(D_loss_real.item()),\n            'D_loss_fake': float(D_loss_fake.item())}\n    # Return G's loss and the components of D's loss.\n    return out\n  return train\n  \n''' This function takes in the model, saves the weights (multiple copies if \n    requested), and prepares sample sheets: one consisting of samples given\n    a fixed noise seed (to show how the model evolves throughout training),\n    a set of full conditional sample sheets, and a set of interp sheets. '''\ndef save_and_sample(G, D, G_ema, z_, y_, fixed_z, fixed_y, \n                    state_dict, config, experiment_name):\n  utils.save_weights(G, D, state_dict, config['weights_root'],\n                     experiment_name, None, G_ema if config['ema'] else None)\n  # Save an additional copy to mitigate accidental corruption if process\n  # is killed during a save (it's happened to me before -.-)\n  if config['num_save_copies'] > 0:\n    utils.save_weights(G, D, state_dict, config['weights_root'],\n                       experiment_name,\n                       'copy%d' %  state_dict['save_num'],\n                       G_ema if config['ema'] else None)\n    state_dict['save_num'] = (state_dict['save_num'] + 1 ) % config['num_save_copies']\n    \n  # Use EMA G for samples or non-EMA?\n  which_G = G_ema if config['ema'] and config['use_ema'] else G\n  \n  # Accumulate standing statistics?\n  if config['accumulate_stats']:\n    utils.accumulate_standing_stats(G_ema if config['ema'] and config['use_ema'] else G,\n                           z_, y_, config['n_classes'],\n                           config['num_standing_accumulations'])\n  \n  # Save a random sample sheet with fixed z and y      \n  with torch.no_grad():\n    if config['parallel']:\n      fixed_Gz =  nn.parallel.data_parallel(which_G, (fixed_z, which_G.shared(fixed_y)))\n    else:\n      fixed_Gz = which_G(fixed_z, which_G.shared(fixed_y))\n  if not os.path.isdir('%s/%s' % (config['samples_root'], experiment_name)):\n    os.mkdir('%s/%s' % (config['samples_root'], experiment_name))\n  image_filename = '%s/%s/fixed_samples%d.jpg' % (config['samples_root'], \n                                                  experiment_name,\n                                                  state_dict['itr'])\n  torchvision.utils.save_image(fixed_Gz.float().cpu(), image_filename, ## NOTE: xcliu for torchvision 0.8.2\n                             nrow=int(fixed_Gz.shape[0] **0.5), normalize=True)\n  #torchvision.utils.save_image(torch.from_numpy(fixed_Gz.float().cpu().numpy()), image_filename,\n  #                           nrow=int(fixed_Gz.shape[0] **0.5), normalize=True)\n\n  # For now, every time we save, also save sample sheets\n  utils.sample_sheet(which_G,\n                     classes_per_sheet=utils.classes_per_sheet_dict[config['dataset']],\n                     num_classes=config['n_classes'],\n                     samples_per_class=10, parallel=config['parallel'],\n                     samples_root=config['samples_root'],\n                     experiment_name=experiment_name,\n                     folder_number=state_dict['itr'],\n                     z_=z_)\n  # Also save interp sheets\n  for fix_z, fix_y in zip([False, False, True], [False, True, False]):\n    utils.interp_sheet(which_G,\n                       num_per_sheet=16,\n                       num_midpoints=8,\n                       num_classes=config['n_classes'],\n                       parallel=config['parallel'],\n                       samples_root=config['samples_root'],\n                       experiment_name=experiment_name,\n                       folder_number=state_dict['itr'],\n                       sheet_number=0,\n                       fix_z=fix_z, fix_y=fix_y, device='cuda')\n\n\n  \n''' This function runs the inception metrics code, checks if the results\n    are an improvement over the previous best (either in IS or FID, \n    user-specified), logs the results, and saves a best_ copy if it's an \n    improvement. '''\ndef test(G, D, G_ema, z_, y_, state_dict, config, sample, get_inception_metrics,\n         experiment_name, test_log):\n  print('Gathering inception metrics...')\n  if config['accumulate_stats']:\n    utils.accumulate_standing_stats(G_ema if config['ema'] and config['use_ema'] else G,\n                           z_, y_, config['n_classes'],\n                           config['num_standing_accumulations'])\n  IS_mean, IS_std, FID = get_inception_metrics(sample, \n                                               config['num_inception_images'],\n                                               num_splits=10)\n  print('Itr %d: PYTORCH UNOFFICIAL Inception Score is %3.3f +/- %3.3f, PYTORCH UNOFFICIAL FID is %5.4f' % (state_dict['itr'], IS_mean, IS_std, FID))\n  # If improved over previous best metric, save approrpiate copy\n  if ((config['which_best'] == 'IS' and IS_mean > state_dict['best_IS'])\n    or (config['which_best'] == 'FID' and FID < state_dict['best_FID'])):\n    print('%s improved over previous best, saving checkpoint...' % config['which_best'])\n    utils.save_weights(G, D, state_dict, config['weights_root'],\n                       experiment_name, 'best%d' % state_dict['save_best_num'],\n                       G_ema if config['ema'] else None)\n    state_dict['save_best_num'] = (state_dict['save_best_num'] + 1 ) % config['num_best_copies']\n  state_dict['best_IS'] = max(state_dict['best_IS'], IS_mean)\n  state_dict['best_FID'] = min(state_dict['best_FID'], FID)\n  # Log results to file\n  test_log.log(itr=int(state_dict['itr']), IS_mean=float(IS_mean),\n               IS_std=float(IS_std), FID=float(FID))\n"
  },
  {
    "path": "BigGAN_utils/utils.py",
    "content": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n\n''' Utilities file\nThis file contains utility functions for bookkeeping, logging, and data loading.\nMethods which directly affect training should either go in layers, the model,\nor train_fns.py.\n'''\n\nfrom __future__ import print_function\nimport sys\nimport os\nimport numpy as np\nimport time\nimport datetime\nimport json\nimport pickle\nfrom argparse import ArgumentParser\nimport animal_hash\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torchvision\nimport torchvision.transforms as transforms\nfrom torch.utils.data import DataLoader\n\nimport datasets as dset\n\ndef prepare_parser():\n  usage = 'Parser for all scripts.'\n  parser = ArgumentParser(description=usage)\n  \n  ### Dataset/Dataloader stuff ###\n  parser.add_argument(\n    '--dataset', type=str, default='I128_hdf5',\n    help='Which Dataset to train on, out of I128, I256, C10, C100;'\n         'Append \"_hdf5\" to use the hdf5 version for ISLVRC '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--augment', action='store_true', default=False,\n    help='Augment with random crops and flips (default: %(default)s)')\n  parser.add_argument(\n    '--num_workers', type=int, default=8,\n    help='Number of dataloader workers; consider using less for HDF5 '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--no_pin_memory', action='store_false', dest='pin_memory', default=True,\n    help='Pin data into memory through dataloader? (default: %(default)s)') \n  parser.add_argument(\n    '--shuffle', action='store_true', default=False,\n    help='Shuffle the data (strongly recommended)? (default: %(default)s)')\n  parser.add_argument(\n    '--load_in_mem', action='store_true', default=False,\n    help='Load all data into memory? (default: %(default)s)')\n  parser.add_argument(\n    '--use_multiepoch_sampler', action='store_true', default=False,\n    help='Use the multi-epoch sampler for dataloader? (default: %(default)s)')\n  \n  \n  ### Model stuff ###\n  parser.add_argument(\n    '--model', type=str, default='BigGAN',\n    help='Name of the model module (default: %(default)s)')\n  parser.add_argument(\n    '--G_param', type=str, default='SN',\n    help='Parameterization style to use for G, spectral norm (SN) or SVD (SVD)'\n          ' or None (default: %(default)s)')\n  parser.add_argument(\n    '--D_param', type=str, default='SN',\n    help='Parameterization style to use for D, spectral norm (SN) or SVD (SVD)'\n         ' or None (default: %(default)s)')    \n  parser.add_argument(\n    '--G_ch', type=int, default=64,\n    help='Channel multiplier for G (default: %(default)s)')\n  parser.add_argument(\n    '--D_ch', type=int, default=64,\n    help='Channel multiplier for D (default: %(default)s)')\n  parser.add_argument(\n    '--G_depth', type=int, default=1,\n    help='Number of resblocks per stage in G? (default: %(default)s)')\n  parser.add_argument(\n    '--D_depth', type=int, default=1,\n    help='Number of resblocks per stage in D? (default: %(default)s)')\n  parser.add_argument(\n    '--D_thin', action='store_false', dest='D_wide', default=True,\n    help='Use the SN-GAN channel pattern for D? (default: %(default)s)')\n  parser.add_argument(\n    '--G_shared', action='store_true', default=False,\n    help='Use shared embeddings in G? (default: %(default)s)')\n  parser.add_argument(\n    '--shared_dim', type=int, default=0,\n    help='G''s shared embedding dimensionality; if 0, will be equal to dim_z. '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--dim_z', type=int, default=128,\n    help='Noise dimensionality: %(default)s)')\n  parser.add_argument(\n    '--z_var', type=float, default=1.0,\n    help='Noise variance: %(default)s)')    \n  parser.add_argument(\n    '--hier', action='store_true', default=False,\n    help='Use hierarchical z in G? (default: %(default)s)')\n  parser.add_argument(\n    '--cross_replica', action='store_true', default=False,\n    help='Cross_replica batchnorm in G?(default: %(default)s)')\n  parser.add_argument(\n    '--mybn', action='store_true', default=False,\n    help='Use my batchnorm (which supports standing stats?) %(default)s)')\n  parser.add_argument(\n    '--G_nl', type=str, default='relu',\n    help='Activation function for G (default: %(default)s)')\n  parser.add_argument(\n    '--D_nl', type=str, default='relu',\n    help='Activation function for D (default: %(default)s)')\n  parser.add_argument(\n    '--G_attn', type=str, default='64',\n    help='What resolutions to use attention on for G (underscore separated) '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--D_attn', type=str, default='64',\n    help='What resolutions to use attention on for D (underscore separated) '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--norm_style', type=str, default='bn',\n    help='Normalizer style for G, one of bn [batchnorm], in [instancenorm], '\n         'ln [layernorm], gn [groupnorm] (default: %(default)s)')\n         \n  ### Model init stuff ###\n  parser.add_argument(\n    '--seed', type=int, default=0,\n    help='Random seed to use; affects both initialization and '\n         ' dataloading. (default: %(default)s)')\n  parser.add_argument(\n    '--G_init', type=str, default='ortho',\n    help='Init style to use for G (default: %(default)s)')\n  parser.add_argument(\n    '--D_init', type=str, default='ortho',\n    help='Init style to use for D(default: %(default)s)')\n  parser.add_argument(\n    '--skip_init', action='store_true', default=False,\n    help='Skip initialization, ideal for testing when ortho init was used '\n          '(default: %(default)s)')\n  \n  ### Optimizer stuff ###\n  parser.add_argument(\n    '--G_lr', type=float, default=5e-5,\n    help='Learning rate to use for Generator (default: %(default)s)')\n  parser.add_argument(\n    '--D_lr', type=float, default=2e-4,\n    help='Learning rate to use for Discriminator (default: %(default)s)')\n  parser.add_argument(\n    '--G_B1', type=float, default=0.0,\n    help='Beta1 to use for Generator (default: %(default)s)')\n  parser.add_argument(\n    '--D_B1', type=float, default=0.0,\n    help='Beta1 to use for Discriminator (default: %(default)s)')\n  parser.add_argument(\n    '--G_B2', type=float, default=0.999,\n    help='Beta2 to use for Generator (default: %(default)s)')\n  parser.add_argument(\n    '--D_B2', type=float, default=0.999,\n    help='Beta2 to use for Discriminator (default: %(default)s)')\n    \n  ### Batch size, parallel, and precision stuff ###\n  parser.add_argument(\n    '--batch_size', type=int, default=64,\n    help='Default overall batchsize (default: %(default)s)')\n  parser.add_argument(\n    '--G_batch_size', type=int, default=0,\n    help='Batch size to use for G; if 0, same as D (default: %(default)s)')\n  parser.add_argument(\n    '--num_G_accumulations', type=int, default=1,\n    help='Number of passes to accumulate G''s gradients over '\n         '(default: %(default)s)')  \n  parser.add_argument(\n    '--num_D_steps', type=int, default=2,\n    help='Number of D steps per G step (default: %(default)s)')\n  parser.add_argument(\n    '--num_D_accumulations', type=int, default=1,\n    help='Number of passes to accumulate D''s gradients over '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--split_D', action='store_true', default=False,\n    help='Run D twice rather than concatenating inputs? (default: %(default)s)')\n  parser.add_argument(\n    '--num_epochs', type=int, default=100,\n    help='Number of epochs to train for (default: %(default)s)')\n  parser.add_argument(\n    '--parallel', action='store_true', default=False,\n    help='Train with multiple GPUs (default: %(default)s)')\n  parser.add_argument(\n    '--G_fp16', action='store_true', default=False,\n    help='Train with half-precision in G? (default: %(default)s)')\n  parser.add_argument(\n    '--D_fp16', action='store_true', default=False,\n    help='Train with half-precision in D? (default: %(default)s)')\n  parser.add_argument(\n    '--D_mixed_precision', action='store_true', default=False,\n    help='Train with half-precision activations but fp32 params in D? '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--G_mixed_precision', action='store_true', default=False,\n    help='Train with half-precision activations but fp32 params in G? '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--accumulate_stats', action='store_true', default=False,\n    help='Accumulate \"standing\" batchnorm stats? (default: %(default)s)')\n  parser.add_argument(\n    '--num_standing_accumulations', type=int, default=16,\n    help='Number of forward passes to use in accumulating standing stats? '\n         '(default: %(default)s)')        \n    \n  ### Bookkeping stuff ###  \n  parser.add_argument(\n    '--G_eval_mode', action='store_true', default=False,\n    help='Run G in eval mode (running/standing stats?) at sample/test time? '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--save_every', type=int, default=2000,\n    help='Save every X iterations (default: %(default)s)')\n  parser.add_argument(\n    '--num_save_copies', type=int, default=2,\n    help='How many copies to save (default: %(default)s)')\n  parser.add_argument(\n    '--num_best_copies', type=int, default=2,\n    help='How many previous best checkpoints to save (default: %(default)s)')\n  parser.add_argument(\n    '--which_best', type=str, default='IS',\n    help='Which metric to use to determine when to save new \"best\"'\n         'checkpoints, one of IS or FID (default: %(default)s)')\n  parser.add_argument(\n    '--no_fid', action='store_true', default=False,\n    help='Calculate IS only, not FID? (default: %(default)s)')\n  parser.add_argument(\n    '--test_every', type=int, default=5000,\n    help='Test every X iterations (default: %(default)s)')\n  parser.add_argument(\n    '--num_inception_images', type=int, default=50000,\n    help='Number of samples to compute inception metrics with '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--hashname', action='store_true', default=False,\n    help='Use a hash of the experiment name instead of the full config '\n         '(default: %(default)s)') \n  parser.add_argument(\n    '--base_root', type=str, default='',\n    help='Default location to store all weights, samples, data, and logs '\n           ' (default: %(default)s)')\n  parser.add_argument(\n    '--data_root', type=str, default='data',\n    help='Default location where data is stored (default: %(default)s)')\n  parser.add_argument(\n    '--weights_root', type=str, default='weights',\n    help='Default location to store weights (default: %(default)s)')\n  parser.add_argument(\n    '--logs_root', type=str, default='logs',\n    help='Default location to store logs (default: %(default)s)')\n  parser.add_argument(\n    '--samples_root', type=str, default='samples',\n    help='Default location to store samples (default: %(default)s)')  \n  parser.add_argument(\n    '--pbar', type=str, default='mine',\n    help='Type of progressbar to use; one of \"mine\" or \"tqdm\" '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--name_suffix', type=str, default='',\n    help='Suffix for experiment name for loading weights for sampling '\n         '(consider \"best0\") (default: %(default)s)')\n  parser.add_argument(\n    '--experiment_name', type=str, default='',\n    help='Optionally override the automatic experiment naming with this arg. '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--config_from_name', action='store_true', default=False,\n    help='Use a hash of the experiment name instead of the full config '\n         '(default: %(default)s)')\n         \n  ### EMA Stuff ###\n  parser.add_argument(\n    '--ema', action='store_true', default=False,\n    help='Keep an ema of G''s weights? (default: %(default)s)')\n  parser.add_argument(\n    '--ema_decay', type=float, default=0.9999,\n    help='EMA decay rate (default: %(default)s)')\n  parser.add_argument(\n    '--use_ema', action='store_true', default=False,\n    help='Use the EMA parameters of G for evaluation? (default: %(default)s)')\n  parser.add_argument(\n    '--ema_start', type=int, default=0,\n    help='When to start updating the EMA weights (default: %(default)s)')\n  \n  ### Numerical precision and SV stuff ### \n  parser.add_argument(\n    '--adam_eps', type=float, default=1e-8,\n    help='epsilon value to use for Adam (default: %(default)s)')\n  parser.add_argument(\n    '--BN_eps', type=float, default=1e-5,\n    help='epsilon value to use for BatchNorm (default: %(default)s)')\n  parser.add_argument(\n    '--SN_eps', type=float, default=1e-8,\n    help='epsilon value to use for Spectral Norm(default: %(default)s)')\n  parser.add_argument(\n    '--num_G_SVs', type=int, default=1,\n    help='Number of SVs to track in G (default: %(default)s)')\n  parser.add_argument(\n    '--num_D_SVs', type=int, default=1,\n    help='Number of SVs to track in D (default: %(default)s)')\n  parser.add_argument(\n    '--num_G_SV_itrs', type=int, default=1,\n    help='Number of SV itrs in G (default: %(default)s)')\n  parser.add_argument(\n    '--num_D_SV_itrs', type=int, default=1,\n    help='Number of SV itrs in D (default: %(default)s)')\n  \n  ### Ortho reg stuff ### \n  parser.add_argument(\n    '--G_ortho', type=float, default=0.0, # 1e-4 is default for BigGAN\n    help='Modified ortho reg coefficient in G(default: %(default)s)')\n  parser.add_argument(\n    '--D_ortho', type=float, default=0.0,\n    help='Modified ortho reg coefficient in D (default: %(default)s)')\n  parser.add_argument(\n    '--toggle_grads', action='store_true', default=True,\n    help='Toggle D and G''s \"requires_grad\" settings when not training them? '\n         ' (default: %(default)s)')\n  \n  ### Which train function ###\n  parser.add_argument(\n    '--which_train_fn', type=str, default='GAN',\n    help='How2trainyourbois (default: %(default)s)')  \n  \n  ### Resume training stuff\n  parser.add_argument(\n    '--load_weights', type=str, default='',\n    help='Suffix for which weights to load (e.g. best0, copy0) '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--resume', action='store_true', default=False,\n    help='Resume training? (default: %(default)s)')\n  \n  ### Log stuff ###\n  parser.add_argument(\n    '--logstyle', type=str, default='%3.3e',\n    help='What style to use when logging training metrics?'\n         'One of: %#.#f/ %#.#e (float/exp, text),'\n         'pickle (python pickle),'\n         'npz (numpy zip),'\n         'mat (MATLAB .mat file) (default: %(default)s)')\n  parser.add_argument(\n    '--log_G_spectra', action='store_true', default=False,\n    help='Log the top 3 singular values in each SN layer in G? '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--log_D_spectra', action='store_true', default=False,\n    help='Log the top 3 singular values in each SN layer in D? '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--sv_log_interval', type=int, default=10,\n    help='Iteration interval for logging singular values '\n         ' (default: %(default)s)') \n  \n  parser.add_argument('--text', type=str)\n\n  return parser\n\n# Arguments for sample.py; not presently used in train.py\ndef add_sample_parser(parser):\n  parser.add_argument(\n    '--sample_npz', action='store_true', default=False,\n    help='Sample \"sample_num_npz\" images and save to npz? '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--sample_num_npz', type=int, default=50000,\n    help='Number of images to sample when sampling NPZs '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--sample_sheets', action='store_true', default=False,\n    help='Produce class-conditional sample sheets and stick them in '\n         'the samples root? (default: %(default)s)')\n  parser.add_argument(\n    '--sample_interps', action='store_true', default=False,\n    help='Produce interpolation sheets and stick them in '\n         'the samples root? (default: %(default)s)')         \n  parser.add_argument(\n    '--sample_sheet_folder_num', type=int, default=-1,\n    help='Number to use for the folder for these sample sheets '\n         '(default: %(default)s)')\n  parser.add_argument(\n    '--sample_random', action='store_true', default=False,\n    help='Produce a single random sheet? (default: %(default)s)')\n  parser.add_argument(\n    '--sample_trunc_curves', type=str, default='',\n    help='Get inception metrics with a range of variances?'\n         'To use this, specify a startpoint, step, and endpoint, e.g. '\n         '--sample_trunc_curves 0.2_0.1_1.0 for a startpoint of 0.2, '\n         'endpoint of 1.0, and stepsize of 1.0.  Note that this is '\n         'not exactly identical to using tf.truncated_normal, but should '\n         'have approximately the same effect. (default: %(default)s)')\n  parser.add_argument(\n    '--sample_inception_metrics', action='store_true', default=False,\n    help='Calculate Inception metrics with sample.py? (default: %(default)s)')  \n  return parser\n\n# Convenience dicts\ndset_dict = {'I32': dset.ImageFolder, 'I64': dset.ImageFolder, \n             'I128': dset.ImageFolder, 'I256': dset.ImageFolder,\n             'I32_hdf5': dset.ILSVRC_HDF5, 'I64_hdf5': dset.ILSVRC_HDF5, \n             'I128_hdf5': dset.ILSVRC_HDF5, 'I256_hdf5': dset.ILSVRC_HDF5,\n             'C10': dset.CIFAR10, 'C100': dset.CIFAR100}\nimsize_dict = {'I32': 32, 'I32_hdf5': 32,\n               'I64': 64, 'I64_hdf5': 64,\n               'I128': 128, 'I128_hdf5': 128,\n               'I256': 256, 'I256_hdf5': 256,\n               'C10': 32, 'C100': 32}\nroot_dict = {'I32': 'ImageNet', 'I32_hdf5': 'ILSVRC32.hdf5',\n             'I64': 'ImageNet', 'I64_hdf5': 'ILSVRC64.hdf5',\n             'I128': 'ImageNet', 'I128_hdf5': 'ILSVRC128.hdf5',\n             'I256': 'ImageNet', 'I256_hdf5': 'ILSVRC256.hdf5',\n             'C10': 'cifar', 'C100': 'cifar'}\nnclass_dict = {'I32': 1000, 'I32_hdf5': 1000,\n               'I64': 1000, 'I64_hdf5': 1000,\n               'I128': 1000, 'I128_hdf5': 1000,\n               'I256': 1000, 'I256_hdf5': 1000,\n               'C10': 10, 'C100': 100}\n# Number of classes to put per sample sheet               \nclasses_per_sheet_dict = {'I32': 50, 'I32_hdf5': 50,\n                          'I64': 50, 'I64_hdf5': 50,\n                          'I128': 20, 'I128_hdf5': 20,\n                          'I256': 20, 'I256_hdf5': 20,\n                          'C10': 10, 'C100': 100}\nactivation_dict = {'inplace_relu': nn.ReLU(inplace=True),\n                   'relu': nn.ReLU(inplace=False),\n                   'ir': nn.ReLU(inplace=True),}\n\nclass CenterCropLongEdge(object):\n  \"\"\"Crops the given PIL Image on the long edge.\n  Args:\n      size (sequence or int): Desired output size of the crop. If size is an\n          int instead of sequence like (h, w), a square crop (size, size) is\n          made.\n  \"\"\"\n  def __call__(self, img):\n    \"\"\"\n    Args:\n        img (PIL Image): Image to be cropped.\n    Returns:\n        PIL Image: Cropped image.\n    \"\"\"\n    return transforms.functional.center_crop(img, min(img.size))\n\n  def __repr__(self):\n    return self.__class__.__name__\n\nclass RandomCropLongEdge(object):\n  \"\"\"Crops the given PIL Image on the long edge with a random start point.\n  Args:\n      size (sequence or int): Desired output size of the crop. If size is an\n          int instead of sequence like (h, w), a square crop (size, size) is\n          made.\n  \"\"\"\n  def __call__(self, img):\n    \"\"\"\n    Args:\n        img (PIL Image): Image to be cropped.\n    Returns:\n        PIL Image: Cropped image.\n    \"\"\"\n    size = (min(img.size), min(img.size))\n    # Only step forward along this edge if it's the long edge\n    i = (0 if size[0] == img.size[0] \n          else np.random.randint(low=0,high=img.size[0] - size[0]))\n    j = (0 if size[1] == img.size[1]\n          else np.random.randint(low=0,high=img.size[1] - size[1]))\n    return transforms.functional.crop(img, i, j, size[0], size[1])\n\n  def __repr__(self):\n    return self.__class__.__name__\n\n    \n# multi-epoch Dataset sampler to avoid memory leakage and enable resumption of\n# training from the same sample regardless of if we stop mid-epoch\nclass MultiEpochSampler(torch.utils.data.Sampler):\n  r\"\"\"Samples elements randomly over multiple epochs\n\n  Arguments:\n      data_source (Dataset): dataset to sample from\n      num_epochs (int) : Number of times to loop over the dataset\n      start_itr (int) : which iteration to begin from\n  \"\"\"\n\n  def __init__(self, data_source, num_epochs, start_itr=0, batch_size=128):\n    self.data_source = data_source\n    self.num_samples = len(self.data_source)\n    self.num_epochs = num_epochs\n    self.start_itr = start_itr\n    self.batch_size = batch_size\n\n    if not isinstance(self.num_samples, int) or self.num_samples <= 0:\n      raise ValueError(\"num_samples should be a positive integeral \"\n                       \"value, but got num_samples={}\".format(self.num_samples))\n\n  def __iter__(self):\n    n = len(self.data_source)\n    # Determine number of epochs\n    num_epochs = int(np.ceil((n * self.num_epochs \n                              - (self.start_itr * self.batch_size)) / float(n)))\n    # Sample all the indices, and then grab the last num_epochs index sets;\n    # This ensures if we're starting at epoch 4, we're still grabbing epoch 4's\n    # indices\n    out = [torch.randperm(n) for epoch in range(self.num_epochs)][-num_epochs:]\n    # Ignore the first start_itr % n indices of the first epoch\n    out[0] = out[0][(self.start_itr * self.batch_size % n):]\n    # if self.replacement:\n      # return iter(torch.randint(high=n, size=(self.num_samples,), dtype=torch.int64).tolist())\n    # return iter(.tolist())\n    output = torch.cat(out).tolist()\n    print('Length dataset output is %d' % len(output))\n    return iter(output)\n\n  def __len__(self):\n    return len(self.data_source) * self.num_epochs - self.start_itr * self.batch_size\n\n\n# Convenience function to centralize all data loaders\ndef get_data_loaders(dataset, data_root=None, augment=False, batch_size=64, \n                     num_workers=8, shuffle=True, load_in_mem=False, hdf5=False,\n                     pin_memory=True, drop_last=True, start_itr=0,\n                     num_epochs=500, use_multiepoch_sampler=False,\n                     **kwargs):\n\n  # Append /FILENAME.hdf5 to root if using hdf5\n  data_root += '/%s' % root_dict[dataset]\n  print('Using dataset root location %s' % data_root)\n\n  which_dataset = dset_dict[dataset]\n  norm_mean = [0.5,0.5,0.5]\n  norm_std = [0.5,0.5,0.5]\n  image_size = imsize_dict[dataset]\n  # For image folder datasets, name of the file where we store the precomputed\n  # image locations to avoid having to walk the dirs every time we load.\n  dataset_kwargs = {'index_filename': '%s_imgs.npz' % dataset}\n  \n  # HDF5 datasets have their own inbuilt transform, no need to train_transform  \n  if 'hdf5' in dataset:\n    train_transform = None\n  else:\n    if augment:\n      print('Data will be augmented...')\n      if dataset in ['C10', 'C100']:\n        train_transform = [transforms.RandomCrop(32, padding=4),\n                           transforms.RandomHorizontalFlip()]\n      else:\n        train_transform = [RandomCropLongEdge(),\n                         transforms.Resize(image_size),\n                         transforms.RandomHorizontalFlip()]\n    else:\n      print('Data will not be augmented...')\n      if dataset in ['C10', 'C100']:\n        train_transform = []\n      else:\n        train_transform = [CenterCropLongEdge(), transforms.Resize(image_size)]\n      # train_transform = [transforms.Resize(image_size), transforms.CenterCrop]\n    train_transform = transforms.Compose(train_transform + [\n                     transforms.ToTensor(),\n                     transforms.Normalize(norm_mean, norm_std)])\n  train_set = which_dataset(root=data_root, transform=train_transform,\n                            load_in_mem=load_in_mem, **dataset_kwargs)\n\n  # Prepare loader; the loaders list is for forward compatibility with\n  # using validation / test splits.\n  loaders = []   \n  if use_multiepoch_sampler:\n    print('Using multiepoch sampler from start_itr %d...' % start_itr)\n    loader_kwargs = {'num_workers': num_workers, 'pin_memory': pin_memory}\n    sampler = MultiEpochSampler(train_set, num_epochs, start_itr, batch_size)\n    train_loader = DataLoader(train_set, batch_size=batch_size,\n                              sampler=sampler, **loader_kwargs)\n  else:\n    loader_kwargs = {'num_workers': num_workers, 'pin_memory': pin_memory,\n                     'drop_last': drop_last} # Default, drop last incomplete batch\n    train_loader = DataLoader(train_set, batch_size=batch_size,\n                              shuffle=shuffle, **loader_kwargs)\n  loaders.append(train_loader)\n  return loaders\n\n\n# Utility file to seed rngs\ndef seed_rng(seed):\n  torch.manual_seed(seed)\n  torch.cuda.manual_seed(seed)\n  np.random.seed(seed)\n\n\n# Utility to peg all roots to a base root\n# If a base root folder is provided, peg all other root folders to it.\ndef update_config_roots(config):\n  if config['base_root']:\n    print('Pegging all root folders to base root %s' % config['base_root'])\n    for key in ['data', 'weights', 'logs', 'samples']:\n      config['%s_root' % key] = '%s/%s' % (config['base_root'], key)\n  return config\n\n\n# Utility to prepare root folders if they don't exist; parent folder must exist\ndef prepare_root(config):\n  for key in ['weights_root', 'logs_root', 'samples_root']:\n    if not os.path.exists(config[key]):\n      print('Making directory %s for %s...' % (config[key], key))\n      os.mkdir(config[key])\n\n\n# Simple wrapper that applies EMA to a model. COuld be better done in 1.0 using\n# the parameters() and buffers() module functions, but for now this works\n# with state_dicts using .copy_\nclass ema(object):\n  def __init__(self, source, target, decay=0.9999, start_itr=0):\n    self.source = source\n    self.target = target\n    self.decay = decay\n    # Optional parameter indicating what iteration to start the decay at\n    self.start_itr = start_itr\n    # Initialize target's params to be source's\n    self.source_dict = self.source.state_dict()\n    self.target_dict = self.target.state_dict()\n    print('Initializing EMA parameters to be source parameters...')\n    with torch.no_grad():\n      for key in self.source_dict:\n        self.target_dict[key].data.copy_(self.source_dict[key].data)\n        # target_dict[key].data = source_dict[key].data # Doesn't work!\n\n  def update(self, itr=None):\n    # If an iteration counter is provided and itr is less than the start itr,\n    # peg the ema weights to the underlying weights.\n    if itr and itr < self.start_itr:\n      decay = 0.0\n    else:\n      decay = self.decay\n    with torch.no_grad():\n      for key in self.source_dict:\n        self.target_dict[key].data.copy_(self.target_dict[key].data * decay \n                                     + self.source_dict[key].data * (1 - decay))\n\n\n# Apply modified ortho reg to a model\n# This function is an optimized version that directly computes the gradient,\n# instead of computing and then differentiating the loss.\ndef ortho(model, strength=1e-4, blacklist=[]):\n  with torch.no_grad():\n    for param in model.parameters():\n      # Only apply this to parameters with at least 2 axes, and not in the blacklist\n      if len(param.shape) < 2 or any([param is item for item in blacklist]):\n        continue\n      w = param.view(param.shape[0], -1)\n      grad = (2 * torch.mm(torch.mm(w, w.t()) \n              * (1. - torch.eye(w.shape[0], device=w.device)), w))\n      param.grad.data += strength * grad.view(param.shape)\n\n\n# Default ortho reg\n# This function is an optimized version that directly computes the gradient,\n# instead of computing and then differentiating the loss.\ndef default_ortho(model, strength=1e-4, blacklist=[]):\n  with torch.no_grad():\n    for param in model.parameters():\n      # Only apply this to parameters with at least 2 axes & not in blacklist\n      if len(param.shape) < 2 or param in blacklist:\n        continue\n      w = param.view(param.shape[0], -1)\n      grad = (2 * torch.mm(torch.mm(w, w.t()) \n               - torch.eye(w.shape[0], device=w.device), w))\n      param.grad.data += strength * grad.view(param.shape)\n\n\n# Convenience utility to switch off requires_grad\ndef toggle_grad(model, on_or_off):\n  for param in model.parameters():\n    param.requires_grad = on_or_off\n\n\n# Function to join strings or ignore them\n# Base string is the string to link \"strings,\" while strings\n# is a list of strings or Nones.\ndef join_strings(base_string, strings):\n  return base_string.join([item for item in strings if item])\n\n\n# Save a model's weights, optimizer, and the state_dict\ndef save_weights(G, D, state_dict, weights_root, experiment_name, \n                 name_suffix=None, G_ema=None):\n  root = '/'.join([weights_root, experiment_name])\n  if not os.path.exists(root):\n    os.mkdir(root)\n  if name_suffix:\n    print('Saving weights to %s/%s...' % (root, name_suffix))\n  else:\n    print('Saving weights to %s...' % root)\n  torch.save(G.state_dict(), \n              '%s/%s.pth' % (root, join_strings('_', ['G', name_suffix])))\n  torch.save(G.optim.state_dict(), \n              '%s/%s.pth' % (root, join_strings('_', ['G_optim', name_suffix])))\n  torch.save(D.state_dict(), \n              '%s/%s.pth' % (root, join_strings('_', ['D', name_suffix])))\n  torch.save(D.optim.state_dict(),\n              '%s/%s.pth' % (root, join_strings('_', ['D_optim', name_suffix])))\n  torch.save(state_dict,\n              '%s/%s.pth' % (root, join_strings('_', ['state_dict', name_suffix])))\n  if G_ema is not None:\n    torch.save(G_ema.state_dict(), \n                '%s/%s.pth' % (root, join_strings('_', ['G_ema', name_suffix])))\n\n\n# Load a model's weights, optimizer, and the state_dict\ndef load_weights(G, D, state_dict, weights_root, experiment_name, \n                 name_suffix=None, G_ema=None, strict=True, load_optim=True):\n  root = '/'.join([weights_root, experiment_name])\n  if name_suffix:\n    print('Loading %s weights from %s...' % (name_suffix, root))\n  else:\n    print('Loading weights from %s...' % root)\n  if G is not None:\n    G.load_state_dict(\n      torch.load('%s/%s.pth' % (root, join_strings('_', ['G', name_suffix]))),\n      strict=strict)\n    if load_optim:\n      G.optim.load_state_dict(\n        torch.load('%s/%s.pth' % (root, join_strings('_', ['G_optim', name_suffix]))))\n  if D is not None:\n    D.load_state_dict(\n      torch.load('%s/%s.pth' % (root, join_strings('_', ['D', name_suffix]))),\n      strict=strict)\n    if load_optim:\n      D.optim.load_state_dict(\n        torch.load('%s/%s.pth' % (root, join_strings('_', ['D_optim', name_suffix]))))\n  # Load state dict\n  for item in state_dict:\n    state_dict[item] = torch.load('%s/%s.pth' % (root, join_strings('_', ['state_dict', name_suffix])))[item]\n  if G_ema is not None:\n    G_ema.load_state_dict(\n      torch.load('%s/%s.pth' % (root, join_strings('_', ['G_ema', name_suffix]))),\n      strict=strict)\n\n\n''' MetricsLogger originally stolen from VoxNet source code.\n    Used for logging inception metrics'''\nclass MetricsLogger(object):\n  def __init__(self, fname, reinitialize=False):\n    self.fname = fname\n    self.reinitialize = reinitialize\n    if os.path.exists(self.fname):\n      if self.reinitialize:\n        print('{} exists, deleting...'.format(self.fname))\n        os.remove(self.fname)\n\n  def log(self, record=None, **kwargs):\n    \"\"\"\n    Assumption: no newlines in the input.\n    \"\"\"\n    if record is None:\n      record = {}\n    record.update(kwargs)\n    record['_stamp'] = time.time()\n    with open(self.fname, 'a') as f:\n      f.write(json.dumps(record, ensure_ascii=True) + '\\n')\n\n\n# Logstyle is either:\n# '%#.#f' for floating point representation in text\n# '%#.#e' for exponent representation in text\n# 'npz' for output to npz # NOT YET SUPPORTED\n# 'pickle' for output to a python pickle # NOT YET SUPPORTED\n# 'mat' for output to a MATLAB .mat file # NOT YET SUPPORTED\nclass MyLogger(object):\n  def __init__(self, fname, reinitialize=False, logstyle='%3.3f'):\n    self.root = fname\n    if not os.path.exists(self.root):\n      os.mkdir(self.root)\n    self.reinitialize = reinitialize\n    self.metrics = []\n    self.logstyle = logstyle # One of '%3.3f' or like '%3.3e'\n\n  # Delete log if re-starting and log already exists\n  def reinit(self, item):\n    if os.path.exists('%s/%s.log' % (self.root, item)):\n      if self.reinitialize:\n        # Only print the removal mess\n        if 'sv' in item :\n          if not any('sv' in item for item in self.metrics):\n            print('Deleting singular value logs...')\n        else:\n          print('{} exists, deleting...'.format('%s_%s.log' % (self.root, item)))\n        os.remove('%s/%s.log' % (self.root, item))\n  \n  # Log in plaintext; this is designed for being read in MATLAB(sorry not sorry)\n  def log(self, itr, **kwargs):\n    for arg in kwargs:\n      if arg not in self.metrics:\n        if self.reinitialize:\n          self.reinit(arg)\n        self.metrics += [arg]\n      if self.logstyle == 'pickle':\n        print('Pickle not currently supported...')\n         # with open('%s/%s.log' % (self.root, arg), 'a') as f:\n          # pickle.dump(kwargs[arg], f)\n      elif self.logstyle == 'mat':\n        print('.mat logstyle not currently supported...')\n      else:\n        with open('%s/%s.log' % (self.root, arg), 'a') as f:\n          f.write('%d: %s\\n' % (itr, self.logstyle % kwargs[arg]))\n\n\n# Write some metadata to the logs directory\ndef write_metadata(logs_root, experiment_name, config, state_dict):\n  with open(('%s/%s/metalog.txt' % \n             (logs_root, experiment_name)), 'w') as writefile:\n    writefile.write('datetime: %s\\n' % str(datetime.datetime.now()))\n    writefile.write('config: %s\\n' % str(config))\n    writefile.write('state: %s\\n' %str(state_dict))\n\n\n\"\"\"\nVery basic progress indicator to wrap an iterable in.\n\nAuthor: Jan Schlüter\nAndy's adds: time elapsed in addition to ETA, makes it possible to add\nestimated time to 1k iters instead of estimated time to completion.\n\"\"\"\ndef progress(items, desc='', total=None, min_delay=0.1, displaytype='s1k'):\n  \"\"\"\n  Returns a generator over `items`, printing the number and percentage of\n  items processed and the estimated remaining processing time before yielding\n  the next item. `total` gives the total number of items (required if `items`\n  has no length), and `min_delay` gives the minimum time in seconds between\n  subsequent prints. `desc` gives an optional prefix text (end with a space).\n  \"\"\"\n  total = total or len(items)\n  t_start = time.time()\n  t_last = 0\n  for n, item in enumerate(items):\n    t_now = time.time()\n    if t_now - t_last > min_delay:\n      print(\"\\r%s%d/%d (%6.2f%%)\" % (\n              desc, n+1, total, n / float(total) * 100), end=\" \")\n      if n > 0:\n        \n        if displaytype == 's1k': # minutes/seconds for 1000 iters\n          next_1000 = n + (1000 - n%1000)\n          t_done = t_now - t_start\n          t_1k = t_done / n * next_1000\n          outlist = list(divmod(t_done, 60)) + list(divmod(t_1k - t_done, 60))\n          print(\"(TE/ET1k: %d:%02d / %d:%02d)\" % tuple(outlist), end=\" \")\n        else:# displaytype == 'eta':\n          t_done = t_now - t_start\n          t_total = t_done / n * total\n          outlist = list(divmod(t_done, 60)) + list(divmod(t_total - t_done, 60))\n          print(\"(TE/ETA: %d:%02d / %d:%02d)\" % tuple(outlist), end=\" \")\n          \n      sys.stdout.flush()\n      t_last = t_now\n    yield item\n  t_total = time.time() - t_start\n  print(\"\\r%s%d/%d (100.00%%) (took %d:%02d)\" % ((desc, total, total) +\n                                                   divmod(t_total, 60)))\n\n\n# Sample function for use with inception metrics\ndef sample(G, z_, y_, config):\n  with torch.no_grad():\n    z_.sample_()\n    y_.sample_()\n    if config['parallel']:\n      G_z =  nn.parallel.data_parallel(G, (z_, G.shared(y_)))\n    else:\n      G_z = G(z_, G.shared(y_))\n    return G_z, y_\n\n\n# Sample function for sample sheets\ndef sample_sheet(G, classes_per_sheet, num_classes, samples_per_class, parallel,\n                 samples_root, experiment_name, folder_number, z_=None):\n  # Prepare sample directory\n  if not os.path.isdir('%s/%s' % (samples_root, experiment_name)):\n    os.mkdir('%s/%s' % (samples_root, experiment_name))\n  if not os.path.isdir('%s/%s/%d' % (samples_root, experiment_name, folder_number)):\n    os.mkdir('%s/%s/%d' % (samples_root, experiment_name, folder_number))\n  # loop over total number of sheets\n  for i in range(num_classes // classes_per_sheet):\n    ims = []\n    y = torch.arange(i * classes_per_sheet, (i + 1) * classes_per_sheet, device='cuda')\n    for j in range(samples_per_class):\n      if (z_ is not None) and hasattr(z_, 'sample_') and classes_per_sheet <= z_.size(0):\n        z_.sample_()\n      else:\n        z_ = torch.randn(classes_per_sheet, G.dim_z, device='cuda')        \n      with torch.no_grad():\n        if parallel:\n          o = nn.parallel.data_parallel(G, (z_[:classes_per_sheet], G.shared(y)))\n        else:\n          o = G(z_[:classes_per_sheet], G.shared(y))\n\n      ims += [o.data.cpu()]\n    # This line should properly unroll the images\n    out_ims = torch.stack(ims, 1).view(-1, ims[0].shape[1], ims[0].shape[2], \n                                       ims[0].shape[3]).data.float().cpu()\n    #out_ims = torch.from_numpy(out_ims.numpy()) ### NOTE: xcliu for torchvision\n    # The path for the samples\n    image_filename = '%s/%s/%d/samples%d.jpg' % (samples_root, experiment_name, \n                                                 folder_number, i)\n    torchvision.utils.save_image(out_ims, image_filename,\n                                 nrow=samples_per_class, normalize=True)\n\n\n# Interp function; expects x0 and x1 to be of shape (shape0, 1, rest_of_shape..)\ndef interp(x0, x1, num_midpoints):\n  lerp = torch.linspace(0, 1.0, num_midpoints + 2, device='cuda').to(x0.dtype)\n  return ((x0 * (1 - lerp.view(1, -1, 1))) + (x1 * lerp.view(1, -1, 1)))\n\n\n# interp sheet function\n# Supports full, class-wise and intra-class interpolation\ndef interp_sheet(G, num_per_sheet, num_midpoints, num_classes, parallel,\n                 samples_root, experiment_name, folder_number, sheet_number=0,\n                 fix_z=False, fix_y=False, device='cuda'):\n  # Prepare zs and ys\n  if fix_z: # If fix Z, only sample 1 z per row\n    zs = torch.randn(num_per_sheet, 1, G.dim_z, device=device)\n    zs = zs.repeat(1, num_midpoints + 2, 1).view(-1, G.dim_z)\n  else:\n    zs = interp(torch.randn(num_per_sheet, 1, G.dim_z, device=device),\n                torch.randn(num_per_sheet, 1, G.dim_z, device=device),\n                num_midpoints).view(-1, G.dim_z)\n  if fix_y: # If fix y, only sample 1 z per row\n    ys = sample_1hot(num_per_sheet, num_classes)\n    ys = G.shared(ys).view(num_per_sheet, 1, -1)\n    ys = ys.repeat(1, num_midpoints + 2, 1).view(num_per_sheet * (num_midpoints + 2), -1)\n  else:\n    ys = interp(G.shared(sample_1hot(num_per_sheet, num_classes)).view(num_per_sheet, 1, -1),\n                G.shared(sample_1hot(num_per_sheet, num_classes)).view(num_per_sheet, 1, -1),\n                num_midpoints).view(num_per_sheet * (num_midpoints + 2), -1)\n  # Run the net--note that we've already passed y through G.shared.\n  if G.fp16:\n    zs = zs.half()\n  with torch.no_grad():\n    if parallel:\n      out_ims = nn.parallel.data_parallel(G, (zs, ys)).data.cpu()\n    else:\n      out_ims = G(zs, ys).data.cpu()\n  interp_style = '' + ('Z' if not fix_z else '') + ('Y' if not fix_y else '')\n  image_filename = '%s/%s/%d/interp%s%d.jpg' % (samples_root, experiment_name,\n                                                folder_number, interp_style,\n                                                sheet_number)\n  torchvision.utils.save_image(out_ims, image_filename,\n                               nrow=num_midpoints + 2, normalize=True)\n\n\n# Convenience debugging function to print out gradnorms and shape from each layer\n# May need to rewrite this so we can actually see which parameter is which\ndef print_grad_norms(net):\n    gradsums = [[float(torch.norm(param.grad).item()),\n                 float(torch.norm(param).item()), param.shape]\n                for param in net.parameters()]\n    order = np.argsort([item[0] for item in gradsums])\n    print(['%3.3e,%3.3e, %s' % (gradsums[item_index][0],\n                                gradsums[item_index][1],\n                                str(gradsums[item_index][2])) \n                              for item_index in order])\n\n\n# Get singular values to log. This will use the state dict to find them\n# and substitute underscores for dots.\ndef get_SVs(net, prefix):\n  d = net.state_dict()\n  return {('%s_%s' % (prefix, key)).replace('.', '_') :\n            float(d[key].item())\n            for key in d if 'sv' in key}\n\n\n# Name an experiment based on its config\ndef name_from_config(config):\n  name = '_'.join([\n  item for item in [\n  'Big%s' % config['which_train_fn'],\n  config['dataset'],\n  config['model'] if config['model'] != 'BigGAN' else None,\n  'seed%d' % config['seed'],\n  'Gch%d' % config['G_ch'],\n  'Dch%d' % config['D_ch'],\n  'Gd%d' % config['G_depth'] if config['G_depth'] > 1 else None,\n  'Dd%d' % config['D_depth'] if config['D_depth'] > 1 else None,\n  'bs%d' % config['batch_size'],\n  'Gfp16' if config['G_fp16'] else None,\n  'Dfp16' if config['D_fp16'] else None,\n  'nDs%d' % config['num_D_steps'] if config['num_D_steps'] > 1 else None,\n  'nDa%d' % config['num_D_accumulations'] if config['num_D_accumulations'] > 1 else None,\n  'nGa%d' % config['num_G_accumulations'] if config['num_G_accumulations'] > 1 else None,\n  'Glr%2.1e' % config['G_lr'],\n  'Dlr%2.1e' % config['D_lr'],\n  'GB%3.3f' % config['G_B1'] if config['G_B1'] !=0.0 else None,\n  'GBB%3.3f' % config['G_B2'] if config['G_B2'] !=0.999 else None,\n  'DB%3.3f' % config['D_B1'] if config['D_B1'] !=0.0 else None,\n  'DBB%3.3f' % config['D_B2'] if config['D_B2'] !=0.999 else None,\n  'Gnl%s' % config['G_nl'],\n  'Dnl%s' % config['D_nl'],\n  'Ginit%s' % config['G_init'],\n  'Dinit%s' % config['D_init'],\n  'G%s' % config['G_param'] if config['G_param'] != 'SN' else None,\n  'D%s' % config['D_param'] if config['D_param'] != 'SN' else None,\n  'Gattn%s' % config['G_attn'] if config['G_attn'] != '0' else None,\n  'Dattn%s' % config['D_attn'] if config['D_attn'] != '0' else None,\n  'Gortho%2.1e' % config['G_ortho'] if config['G_ortho'] > 0.0 else None,\n  'Dortho%2.1e' % config['D_ortho'] if config['D_ortho'] > 0.0 else None,\n  config['norm_style'] if config['norm_style'] != 'bn' else None,\n  'cr' if config['cross_replica'] else None,\n  'Gshared' if config['G_shared'] else None,\n  'hier' if config['hier'] else None,\n  'ema' if config['ema'] else None,\n  config['name_suffix'] if config['name_suffix'] else None,\n  ]\n  if item is not None])\n  # dogball\n  if config['hashname']:\n    return hashname(name)\n  else:\n    return name\n\n\n# A simple function to produce a unique experiment name from the animal hashes.\ndef hashname(name):\n  h = hash(name)\n  a = h % len(animal_hash.a)\n  h = h // len(animal_hash.a)\n  b = h % len(animal_hash.b)\n  h = h // len(animal_hash.c)\n  c = h % len(animal_hash.c)\n  return animal_hash.a[a] + animal_hash.b[b] + animal_hash.c[c]\n\n\n# Get GPU memory, -i is the index\ndef query_gpu(indices):\n  os.system('nvidia-smi -i 0 --query-gpu=memory.free --format=csv')\n\n\n# Convenience function to count the number of parameters in a module\ndef count_parameters(module):\n  print('Number of parameters: {}'.format(\n    sum([p.data.nelement() for p in module.parameters()])))\n\n   \n# Convenience function to sample an index, not actually a 1-hot\ndef sample_1hot(batch_size, num_classes, device='cuda'):\n  return torch.randint(low=0, high=num_classes, size=(batch_size,),\n          device=device, dtype=torch.int64, requires_grad=False)\n\n\n# A highly simplified convenience class for sampling from distributions\n# One could also use PyTorch's inbuilt distributions package.\n# Note that this class requires initialization to proceed as\n# x = Distribution(torch.randn(size))\n# x.init_distribution(dist_type, **dist_kwargs)\n# x = x.to(device,dtype)\n# This is partially based on https://discuss.pytorch.org/t/subclassing-torch-tensor/23754/2\nclass Distribution(torch.Tensor):\n  # Init the params of the distribution\n  def init_distribution(self, dist_type, **kwargs):    \n    self.dist_type = dist_type\n    self.dist_kwargs = kwargs\n    if self.dist_type == 'normal':\n      self.mean, self.var = kwargs['mean'], kwargs['var']\n    elif self.dist_type == 'categorical':\n      self.num_categories = kwargs['num_categories']\n\n  def sample_(self):\n    if self.dist_type == 'normal':\n      self.normal_(self.mean, self.var)\n    elif self.dist_type == 'categorical':\n      self.random_(0, self.num_categories)    \n    # return self.variable\n    \n  # Silly hack: overwrite the to() method to wrap the new object\n  # in a distribution as well\n  def to(self, *args, **kwargs):\n    new_obj = Distribution(self)\n    new_obj.init_distribution(self.dist_type, **self.dist_kwargs)\n    new_obj.data = super().to(*args, **kwargs)    \n    return new_obj\n\n\n# Convenience function to prepare a z and y vector\ndef prepare_z_y(G_batch_size, dim_z, nclasses, device='cuda', \n                fp16=False,z_var=1.0):\n  z_ = Distribution(torch.randn(G_batch_size, dim_z, requires_grad=False))\n  z_.init_distribution('normal', mean=0, var=z_var)\n  z_ = z_.to(device,torch.float16 if fp16 else torch.float32)   \n  \n  if fp16:\n    z_ = z_.half()\n\n  y_ = Distribution(torch.zeros(G_batch_size, requires_grad=False))\n  y_.init_distribution('categorical',num_categories=nclasses)\n  y_ = y_.to(device, torch.int64)\n  return z_, y_\n\n\ndef initiate_standing_stats(net):\n  for module in net.modules():\n    if hasattr(module, 'accumulate_standing'):\n      module.reset_stats()\n      module.accumulate_standing = True\n\n\ndef accumulate_standing_stats(net, z, y, nclasses, num_accumulations=16):\n  initiate_standing_stats(net)\n  net.train()\n  for i in range(num_accumulations):\n    with torch.no_grad():\n      z.normal_()\n      y.random_(0, nclasses)\n      x = net(z, net.shared(y)) # No need to parallelize here unless using syncbn\n  # Set to eval mode\n  net.eval() \n\n\n# This version of Adam keeps an fp32 copy of the parameters and\n# does all of the parameter updates in fp32, while still doing the\n# forwards and backwards passes using fp16 (i.e. fp16 copies of the\n# parameters and fp16 activations).\n#\n# Note that this calls .float().cuda() on the params.\nimport math\nfrom torch.optim.optimizer import Optimizer\nclass Adam16(Optimizer):\n  def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,weight_decay=0):\n    defaults = dict(lr=lr, betas=betas, eps=eps,\n            weight_decay=weight_decay)\n    params = list(params)\n    super(Adam16, self).__init__(params, defaults)\n      \n  # Safety modification to make sure we floatify our state\n  def load_state_dict(self, state_dict):\n    super(Adam16, self).load_state_dict(state_dict)\n    for group in self.param_groups:\n      for p in group['params']:\n        self.state[p]['exp_avg'] = self.state[p]['exp_avg'].float()\n        self.state[p]['exp_avg_sq'] = self.state[p]['exp_avg_sq'].float()\n        self.state[p]['fp32_p'] = self.state[p]['fp32_p'].float()\n\n  def step(self, closure=None):\n    \"\"\"Performs a single optimization step.\n    Arguments:\n      closure (callable, optional): A closure that reevaluates the model\n        and returns the loss.\n    \"\"\"\n    loss = None\n    if closure is not None:\n      loss = closure()\n\n    for group in self.param_groups:\n      for p in group['params']:\n        if p.grad is None:\n          continue\n          \n        grad = p.grad.data.float()\n        state = self.state[p]\n\n        # State initialization\n        if len(state) == 0:\n          state['step'] = 0\n          # Exponential moving average of gradient values\n          state['exp_avg'] = grad.new().resize_as_(grad).zero_()\n          # Exponential moving average of squared gradient values\n          state['exp_avg_sq'] = grad.new().resize_as_(grad).zero_()\n          # Fp32 copy of the weights\n          state['fp32_p'] = p.data.float()\n\n        exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']\n        beta1, beta2 = group['betas']\n\n        state['step'] += 1\n\n        if group['weight_decay'] != 0:\n          grad = grad.add(group['weight_decay'], state['fp32_p'])\n\n        # Decay the first and second moment running average coefficient\n        exp_avg.mul_(beta1).add_(1 - beta1, grad)\n        exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)\n\n        denom = exp_avg_sq.sqrt().add_(group['eps'])\n\n        bias_correction1 = 1 - beta1 ** state['step']\n        bias_correction2 = 1 - beta2 ** state['step']\n        step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1\n      \n        state['fp32_p'].addcdiv_(-step_size, exp_avg, denom)\n        p.data = state['fp32_p'].half()\n\n    return loss\n"
  },
  {
    "path": "BigGAN_utils/weights/README.md",
    "content": "Download pre-trained weights from \nhttps://drive.google.com/drive/folders/1nJ3HmgYgeA9NZr-oU-enqbYeO7zBaANs?usp=sharing\n"
  },
  {
    "path": "DiffAugment_pytorch.py",
    "content": "# Differentiable Augmentation for Data-Efficient GAN Training\n# Shengyu Zhao, Zhijian Liu, Ji Lin, Jun-Yan Zhu, and Song Han\n# https://arxiv.org/pdf/2006.10738\n\nimport torch\nimport torch.nn.functional as F\nimport numpy as np\n\n\ndef DiffAugment(x, policy='', channels_first=True):\n    if policy:\n        if not channels_first:\n            x = x.permute(0, 3, 1, 2)\n        for p in policy.split(','):\n            for f in AUGMENT_FNS[p]:\n                x = f(x)\n        if not channels_first:\n            x = x.permute(0, 2, 3, 1)\n        x = x.contiguous()\n    return x\n\n\ndef rand_brightness(x):\n    x = x + (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) - 0.5)\n    return x\n\n\ndef rand_saturation(x):\n    x_mean = x.mean(dim=1, keepdim=True)\n    x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) * 2) + x_mean\n    return x\n\n\ndef rand_contrast(x):\n    x_mean = x.mean(dim=[1, 2, 3], keepdim=True)\n    x = (x - x_mean) * (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) + 0.5) + x_mean\n    return x\n\n\ndef rand_translation(x, ratio=0.125): ### ratio: org: 0.125\n    shift_x, shift_y = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5)\n    translation_x = torch.randint(-shift_x, shift_x + 1, size=[x.size(0), 1, 1], device=x.device)\n    translation_y = torch.randint(-shift_y, shift_y + 1, size=[x.size(0), 1, 1], device=x.device)\n    grid_batch, grid_x, grid_y = torch.meshgrid(\n        torch.arange(x.size(0), dtype=torch.long, device=x.device),\n        torch.arange(x.size(2), dtype=torch.long, device=x.device),\n        torch.arange(x.size(3), dtype=torch.long, device=x.device),\n    )\n    grid_x = torch.clamp(grid_x + translation_x + 1, 0, x.size(2) + 1)\n    grid_y = torch.clamp(grid_y + translation_y + 1, 0, x.size(3) + 1)\n    x_pad = F.pad(x, [1, 1, 1, 1, 0, 0, 0, 0])\n    x = x_pad.permute(0, 2, 3, 1).contiguous()[grid_batch, grid_x, grid_y].permute(0, 3, 1, 2).contiguous()\n    return x\n\ndef rand_resize(x, min_ratio=0.8, max_ratio=1.2): ### ratio: org: 0.125\n    resize_ratio = np.random.rand()*(max_ratio-min_ratio) + min_ratio\n    resized_img = F.interpolate(x, size=int(resize_ratio*x.shape[3]), mode='bilinear')\n    org_size = x.shape[3]\n    #print('ORG:', x.shape)\n    #print('RESIZED:', resized_img.shape)\n    if int(resize_ratio*x.shape[3]) < x.shape[3]:\n        left_pad = (x.shape[3]-int(resize_ratio*x.shape[3]))/2.\n        left_pad = int(left_pad)\n        right_pad = x.shape[3] - left_pad - resized_img.shape[3]\n        #print('PAD:', left_pad, right_pad)\n        x = F.pad(resized_img, (left_pad, right_pad, left_pad, right_pad), \"constant\", 0.)\n        #print('SMALL:', x.shape)\n    else:\n        left = (int(resize_ratio*x.shape[3])-x.shape[3])/2.\n        left = int(left)\n        #print('LEFT:', left)\n        x = resized_img[:, :, left:(left+x.shape[3]), left:(left+x.shape[3])]\n        #print('LARGE:', x.shape)\n    assert x.shape[2] == org_size\n    assert x.shape[3] == org_size\n\n    return x\n\n\ndef rand_cutout(x, ratio=0.5):\n    cutout_size = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5)\n    offset_x = torch.randint(0, x.size(2) + (1 - cutout_size[0] % 2), size=[x.size(0), 1, 1], device=x.device)\n    offset_y = torch.randint(0, x.size(3) + (1 - cutout_size[1] % 2), size=[x.size(0), 1, 1], device=x.device)\n    grid_batch, grid_x, grid_y = torch.meshgrid(\n        torch.arange(x.size(0), dtype=torch.long, device=x.device),\n        torch.arange(cutout_size[0], dtype=torch.long, device=x.device),\n        torch.arange(cutout_size[1], dtype=torch.long, device=x.device),\n    )\n    grid_x = torch.clamp(grid_x + offset_x - cutout_size[0] // 2, min=0, max=x.size(2) - 1)\n    grid_y = torch.clamp(grid_y + offset_y - cutout_size[1] // 2, min=0, max=x.size(3) - 1)\n    mask = torch.ones(x.size(0), x.size(2), x.size(3), dtype=x.dtype, device=x.device)\n    mask[grid_batch, grid_x, grid_y] = 0\n    x = x * mask.unsqueeze(1)\n    return x\n\n\nAUGMENT_FNS = {\n    'color': [rand_brightness, rand_saturation, rand_contrast],\n    'translation': [rand_translation],\n    'resize': [rand_resize],\n    'cutout': [rand_cutout],\n}\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2021 gnobitab\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "# FuseDream\n\nThis repo contains code for our paper ([paper link](https://arxiv.org/abs/2112.01573)):\n\n**FuseDream: Training-Free Text-to-Image Generation with Improved CLIP+GAN Space Optimization**\n\nby *Xingchao Liu, Chengyue Gong, Lemeng Wu, Shujian Zhang, Hao Su and Qiang Liu* from UCSD and UT Austin.\n\n![FuseDream](./imgs/header_img.png?raw=true \"FuseDream\")\n\n## Introduction\nFuseDream uses pre-trained GANs (we support BigGAN-256 and BigGAN-512 for now) and CLIP to achieve high-fidelity text-to-image generation.\n\n## Requirements\nPlease use `pip` or `conda` to install the following packages:\n`PyTorch==1.7.1, torchvision==0.8.2, lpips==0.1.4` and also the requirements from [BigGAN](https://github.com/ajbrock/BigGAN-PyTorch).\n\n## Getting Started\n\nWe transformed the pre-trained weights of BigGAN from TFHub to PyTorch. To save your time, you can download the transformed BigGAN checkpoints from:\n\nhttps://drive.google.com/drive/folders/1nJ3HmgYgeA9NZr-oU-enqbYeO7zBaANs?usp=sharing\n\nPut the checkpoints into `./BigGAN_utils/weights/`\n\nRun the following command to generate images from text query:\n\n`python fusedream_generator.py --text 'YOUR TEXT' --seed YOUR_SEED`\n\nFor example, to get an image of a blue dog:\n\n`python fusedream_generator.py --text 'A photo of a blue dog.' --seed 1234`\n\nThe generated image will be stored in `./samples`\n\n## Colab Notebook\n\nFor a quick test of *FuseDream*, we provide Colab notebooks for [*FuseDream*(Single Image)](https://colab.research.google.com/drive/17qkzkoQQtzDRFaSCJQzIaNj88xjO9Rm9?usp=sharing) and *FuseDream-Composition*(TODO). Have fun!\n\n## Citations\nIf you use the code, please cite:\n\n```BibTex\n@inproceedings{\nbrock2018large,\ntitle={Large Scale {GAN} Training for High Fidelity Natural Image Synthesis},\nauthor={Andrew Brock and Jeff Donahue and Karen Simonyan},\nbooktitle={International Conference on Learning Representations},\nyear={2019},\nurl={https://openreview.net/forum?id=B1xsqj09Fm},\n}\n```\n\nand\n```BibTex\n@misc{\nliu2021fusedream,\ntitle={FuseDream: Training-Free Text-to-Image Generation with Improved CLIP+GAN Space Optimization}, \nauthor={Xingchao Liu and Chengyue Gong and Lemeng Wu and Shujian Zhang and Hao Su and Qiang Liu},\nyear={2021},\neprint={2112.01573},\narchivePrefix={arXiv},\nprimaryClass={cs.CV}\n}\n```\n"
  },
  {
    "path": "fusedream_generator.py",
    "content": "import torch\nfrom tqdm import tqdm\nfrom torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize\nimport torchvision\nimport BigGAN_utils.utils as utils\nimport clip\nimport torch.nn.functional as F\nfrom DiffAugment_pytorch import DiffAugment\nimport numpy as np\nfrom fusedream_utils import FuseDreamBaseGenerator, get_G, save_image\n\nparser = utils.prepare_parser()\nparser = utils.add_sample_parser(parser)\nargs = parser.parse_args()\n\nINIT_ITERS = 1000\nOPT_ITERS = 1000\n\nutils.seed_rng(args.seed) \n\nsentence = args.text\n\nprint('Generating:', sentence)\nG, config = get_G(512) # Choose from 256 and 512\ngenerator = FuseDreamBaseGenerator(G, config, 10) \nz_cllt, y_cllt = generator.generate_basis(sentence, init_iters=INIT_ITERS, num_basis=5)\n\nz_cllt_save = torch.cat(z_cllt).cpu().numpy()\ny_cllt_save = torch.cat(y_cllt).cpu().numpy()\nimg, z, y = generator.optimize_clip_score(z_cllt, y_cllt, sentence, latent_noise=True, augment=True, opt_iters=OPT_ITERS, optimize_y=True)\nscore = generator.measureAugCLIP(z, y, sentence, augment=True, num_samples=20)\nprint('AugCLIP score:', score)\nimport os\nif not os.path.exists('./samples'):\n    os.mkdir('./samples')\nsave_image(img, 'samples/fusedream_%s_seed_%d_score_%.4f.png'%(sentence, args.seed, score))\n\n"
  },
  {
    "path": "fusedream_utils.py",
    "content": "import torch\nfrom tqdm import tqdm\nfrom torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize\nimport torchvision\nimport BigGAN_utils.utils as utils\nimport clip\nimport torch.nn.functional as F\nfrom DiffAugment_pytorch import DiffAugment\nimport numpy as np\nimport lpips\n\nLATENT_NOISE = 0.01\nZ_THRES = 2.0\nPOLICY = 'color,translation,resize,cutout'\nTEST_POLICY = 'color,translation,resize,cutout'\nmean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).cuda()\nstd = torch.tensor([0.26862954, 0.26130258, 0.27577711]).cuda()\n\ndef AugmentLoss(img, clip_model, text, replicate=10, interp_mode='bilinear', policy=POLICY):\n\n    clip_c = clip_model.logit_scale.exp()\n    img_aug = DiffAugment(img.repeat(replicate, 1, 1, 1), policy=policy)\n    img_aug = (img_aug+1.)/2.\n    img_aug = F.interpolate(img_aug, size=224, mode=interp_mode)\n    img_aug.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])\n\n    logits_per_image, logits_per_text = clip_model(img_aug, text)\n    logits_per_image = logits_per_image / clip_c\n    concept_loss = (-1.) * logits_per_image \n     \n    return concept_loss.mean(dim=0, keepdim=False)\n\ndef NaiveSemanticLoss(img, clip_model, text, interp_mode='bilinear'):\n\n    clip_c = clip_model.logit_scale.exp()\n    img = (img+1.)/2.\n    img = F.interpolate(img, size=224, mode=interp_mode)\n    img.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])\n\n    logits_per_image, logits_per_text = clip_model(img, text)\n    logits_per_image = logits_per_image / clip_c\n    concept_loss = (-1.) * logits_per_image \n     \n    return concept_loss.mean(dim=0, keepdim=False)\n\ndef get_gaussian_mask(size=256):\n    x, y = np.meshgrid(np.linspace(-1,1, size), np.linspace(-1,1,size))\n    dst = np.sqrt(x*x+y*y)\n      \n    # Intializing sigma and muu\n    sigma = 1\n    muu = 0.000\n      \n    # Calculating Gaussian array\n    gauss = np.exp(-( (dst-muu)**2 / ( 2.0 * sigma**2 ) ) )\n    \n    return gauss\n\ndef save_image(img, path, n_per_row=1):\n    with torch.no_grad():\n        torchvision.utils.save_image(\n            torch.from_numpy(img.cpu().numpy()), ##hack, to turn Distribution back to tensor\n            path,\n            nrow=n_per_row,\n            normalize=True,\n        )\n\ndef get_G(resolution=256):\n    if resolution == 256:\n        parser = utils.prepare_parser()\n        parser = utils.add_sample_parser(parser)\n        config = vars(parser.parse_args())\n\n        # See: https://github.com/ajbrock/BigGAN-PyTorch/blob/master/scripts/sample_BigGAN_bs256x8.sh.\n        config[\"resolution\"] = utils.imsize_dict[\"I128_hdf5\"]\n        config[\"n_classes\"] = utils.nclass_dict[\"I128_hdf5\"]\n        config[\"G_activation\"] = utils.activation_dict[\"inplace_relu\"]\n        config[\"D_activation\"] = utils.activation_dict[\"inplace_relu\"]\n        config[\"G_attn\"] = \"128\"\n        config[\"D_attn\"] = \"128\"\n        config[\"G_ch\"] = 96\n        config[\"D_ch\"] = 96\n        config[\"hier\"] = True\n        config[\"dim_z\"] = 140\n        config[\"shared_dim\"] = 128\n        config[\"G_shared\"] = True\n        config = utils.update_config_roots(config)\n        config[\"skip_init\"] = True\n        config[\"no_optim\"] = True\n        config[\"device\"] = \"cuda\"\n        config[\"resolution\"] = 256\n\n        # Set up cudnn.benchmark for free speed.\n        torch.backends.cudnn.benchmark = True\n\n        # Import the model.\n        model = __import__(config[\"model\"])\n        G = model.Generator(**config).to(config[\"device\"])\n        utils.count_parameters(G)\n\n        # Load weights.\n        weights_path = \"./BigGAN_utils/weights/biggan-256.pth\"  # Change this.\n        G.load_state_dict(torch.load(weights_path), strict=False)\n    elif resolution == 512:\n        parser = utils.prepare_parser()\n        parser = utils.add_sample_parser(parser)\n        config = vars(parser.parse_args())\n\n        # See: https://github.com/ajbrock/BigGAN-PyTorch/blob/master/scripts/sample_BigGAN_bs128x8.sh.\n        config[\"resolution\"] = 512\n        config[\"n_classes\"] = utils.nclass_dict[\"I128_hdf5\"]\n        config[\"G_activation\"] = utils.activation_dict[\"inplace_relu\"]\n        config[\"D_activation\"] = utils.activation_dict[\"inplace_relu\"]\n        config[\"G_attn\"] = \"64\"\n        config[\"D_attn\"] = \"64\"\n        config[\"G_ch\"] = 96\n        config[\"D_ch\"] = 64\n        config[\"hier\"] = True\n        config[\"dim_z\"] = 128\n        config[\"shared_dim\"] = 128\n        config[\"G_shared\"] = True\n        config = utils.update_config_roots(config)\n        config[\"skip_init\"] = True\n        config[\"no_optim\"] = True\n        config[\"device\"] = \"cuda\"\n\n        # Set up cudnn.benchmark for free speed.\n        torch.backends.cudnn.benchmark = True\n\n        # Import the model.\n        model = __import__(config[\"model\"])\n        #print(config[\"model\"])\n        G = model.Generator(**config).to(config[\"device\"])\n        utils.count_parameters(G)\n        #print('G parameters:')\n        #for p, m in G.named_parameters():\n        #    print(p)\n        # Load weights.\n        weights_path = \"./BigGAN_utils/weights/biggan-512.pth\"  # Change this.\n        G.load_state_dict(torch.load(weights_path), strict=False)\n\n    return G, config\n\nclass FuseDreamBaseGenerator():\n    def __init__(self, G, G_config, G_batch_size=10, clip_mode=\"ViT-B/32\", interp_mode='bilinear'):\n\n        device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n        self.device = device\n        self.G = G\n        self.clip_model, _ = clip.load(clip_mode, device=device) \n        \n        (self.z_, self.y_) = utils.prepare_z_y(\n            G_batch_size,\n            self.G.dim_z,\n            G_config[\"n_classes\"],\n            device=G_config[\"device\"],\n            fp16=G_config[\"G_fp16\"],\n            z_var=G_config[\"z_var\"],\n        )\n\n        self.G.eval()\n\n        for p in self.G.parameters():\n            p.requires_grad = False\n        for p in self.clip_model.parameters():\n            p.requires_grad = False\n\n        self.interp_mode = interp_mode \n  \n    def generate_basis(self, text, init_iters=500, num_basis=5):\n        text_tok = clip.tokenize([text]).to(self.device)\n        clip_c = self.clip_model.logit_scale.exp() \n\n        z_init_cllt = []\n        y_init_cllt = []\n        z_init = None\n        y_init = None\n        score_init = None\n        with torch.no_grad():\n            for i in tqdm(range(init_iters)):\n                self.z_.sample_()\n                self.y_.sample_()\n\n                self.z_.data = torch.clamp(self.z_.data.detach().clone(), min=-Z_THRES, max=Z_THRES)\n\n                image_tensors = self.G(self.z_, self.G.shared(self.y_))\n                image_tensors = (image_tensors+1.) / 2.\n                image_tensors = F.interpolate(image_tensors, size=224, mode=self.interp_mode)\n                image_tensors.sub_(mean[None, :, None, None]).div_(std[None, :, None, None])\n                \n                logits_per_image, logits_per_text = self.clip_model(image_tensors, text_tok)\n                logits_per_image = logits_per_image/clip_c\n                if z_init is None:\n                    z_init = self.z_.data.detach().clone()\n                    y_init = self.y_.data.detach().clone()\n                    score_init = logits_per_image.squeeze()\n                else:\n                    z_init = torch.cat([z_init, self.z_.data.detach().clone()], dim=0)\n                    y_init = torch.cat([y_init, self.y_.data.detach().clone()], dim=0)\n                    score_init = torch.cat([score_init, logits_per_image.squeeze()])\n\n                sorted, indices = torch.sort(score_init, descending=True)\n                z_init = z_init[indices]\n                y_init = y_init[indices]\n                score_init = score_init[indices]\n                z_init = z_init[:num_basis]\n                y_init = y_init[:num_basis]\n                score_init = score_init[:num_basis]\n        \n        #save_image(self.G(z_init, self.G.shared(y_init)), 'samples/init_%s.png'%text, 1)\n\n        z_init_cllt.append(z_init.detach().clone())\n        y_init_cllt.append(self.G.shared(y_init.detach().clone()))\n\n        return z_init_cllt, y_init_cllt\n\n\n    def optimize_clip_score(self, z_init_cllt, y_init_cllt, text, latent_noise=False, augment=True, opt_iters=500, optimize_y=False):\n\n        text_tok = clip.tokenize([text]).to(self.device)\n        clip_c = self.clip_model.logit_scale.exp()\n\n        z_init_ans = torch.stack(z_init_cllt)\n        y_init_ans = torch.stack(y_init_cllt)\n        z_init_ans = z_init_ans.view(-1, z_init_ans.shape[-1])\n        y_init_ans = y_init_ans.view(-1, y_init_ans.shape[-1])\n\n        w_z = torch.randn((z_init_ans.shape[0], z_init_ans.shape[1])).to(self.device)\n        w_y = torch.randn((y_init_ans.shape[0], y_init_ans.shape[1])).to(self.device)\n        w_z.requires_grad = True\n        w_y.requires_grad = True\n\n        opt_y = torch.zeros(y_init_ans.shape).to(self.device)\n        opt_y.data = y_init_ans.data.detach().clone()\n        opt_z = torch.zeros(z_init_ans.shape).to(self.device)\n        opt_z.data = z_init_ans.data.detach().clone()\n        opt_z.requires_grad = True\n        \n        if not optimize_y:\n            optimizer = torch.optim.Adam([w_z, w_y, opt_z], lr=5e-3, weight_decay=0.0)\n        else:\n            opt_y.requires_grad = True\n            optimizer = torch.optim.Adam([w_z, w_y,opt_y,opt_z], lr=5e-3, weight_decay=0.0)\n\n        for i in tqdm(range(opt_iters)):\n            #print(w_z.shape, w_y.shape)\n            optimizer.zero_grad()\n            \n            if not latent_noise:\n                s_z = torch.softmax(w_z, dim=0)\n                s_y = torch.softmax(w_y, dim=0)\n                #print(s_z)\n            \n                cur_z = s_z * opt_z\n                cur_y = s_y * opt_y\n                cur_z = cur_z.sum(dim=0, keepdim=True)\n                cur_y = cur_y.sum(dim=0, keepdim=True)\n\n                image_tensors = self.G(cur_z, cur_y)\n            else:\n                s_z = torch.softmax(w_z, dim=0)\n                s_y = torch.softmax(w_y, dim=0)\n            \n                cur_z = s_z * opt_z\n                cur_y = s_y * opt_y\n                cur_z = cur_z.sum(dim=0, keepdim=True)\n                cur_y = cur_y.sum(dim=0, keepdim=True)\n                cur_z_aug = cur_z + torch.randn(cur_z.shape).to(cur_z.device) * LATENT_NOISE\n                cur_y_aug = cur_y + torch.randn(cur_y.shape).to(cur_y.device) * LATENT_NOISE\n                \n                image_tensors = self.G(cur_z_aug, cur_y_aug)\n            \n            loss = 0.0\n            for j in range(image_tensors.shape[0]):\n                if augment:\n                    loss = loss + AugmentLoss(image_tensors[j:(j+1)], self.clip_model, text_tok, replicate=50, interp_mode=self.interp_mode)\n                else:\n                    loss = loss + NaiveSemanticLoss(image_tensors[j:(j+1)], self.clip_model, text_tok) \n\n            loss.backward()\n            optimizer.step()\n\n            opt_z.data = torch.clamp(opt_z.data.detach().clone(), min=-Z_THRES, max=Z_THRES)\n\n        z_init_ans = cur_z.detach().clone()\n        y_init_ans = cur_y.detach().clone()\n\n        #save_image(self.G(z_init_ans, y_init_ans), 'samples/opt_%s.png'%text, 1)\n        return self.G(z_init_ans, y_init_ans), z_init_ans, y_init_ans    \n\n    def measureAugCLIP(self, z, y, text, augment=False, num_samples=20):\n        text_tok = clip.tokenize([text]).to(self.device)\n        avg_loss = 0.0\n        for itr in range(num_samples):\n            image_tensors = self.G(z, y)\n\n            for j in range(image_tensors.shape[0]):\n                if augment:\n                    loss = AugmentLoss(image_tensors[j:(j+1)], self.clip_model, text_tok, replicate=50, interp_mode=self.interp_mode, policy=TEST_POLICY)\n                else:\n                    loss = NaiveSemanticLoss(image_tensors[j:(j+1)], self.clip_model, text_tok) \n            avg_loss += loss.item()\n\n        avg_loss /= num_samples\n        return avg_loss * (-1.)\n\n"
  }
]