);
//------------------------------------------------------------------------
================================================
FILE: dnnlib/tflib/ops/upfirdn_2d.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Custom TensorFlow ops for efficient resampling of 2D images."""
import os
import numpy as np
import tensorflow as tf
from .. import custom_ops
def _get_plugin():
return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu')
#----------------------------------------------------------------------------
def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0, impl='cuda'):
r"""Pad, upsample, FIR filter, and downsample a batch of 2D images.
Accepts a batch of 2D images of the shape `[majorDim, inH, inW, minorDim]`
and performs the following operations for each image, batched across
`majorDim` and `minorDim`:
1. Pad the image with zeros by the specified number of pixels on each side
(`padx0`, `padx1`, `pady0`, `pady1`). Specifying a negative value
corresponds to cropping the image.
2. Upsample the image by inserting the zeros after each pixel (`upx`, `upy`).
3. Convolve the image with the specified 2D FIR filter (`k`), shrinking the
image so that the footprint of all output pixels lies within the input image.
4. Downsample the image by throwing away pixels (`downx`, `downy`).
This sequence of operations bears close resemblance to scipy.signal.upfirdn().
The fused op is considerably more efficient than performing the same calculation
using standard TensorFlow ops. It supports gradients of arbitrary order.
Args:
x: Input tensor of the shape `[majorDim, inH, inW, minorDim]`.
k: 2D FIR filter of the shape `[firH, firW]`.
upx: Integer upsampling factor along the X-axis (default: 1).
upy: Integer upsampling factor along the Y-axis (default: 1).
downx: Integer downsampling factor along the X-axis (default: 1).
downy: Integer downsampling factor along the Y-axis (default: 1).
padx0: Number of pixels to pad on the left side (default: 0).
padx1: Number of pixels to pad on the right side (default: 0).
pady0: Number of pixels to pad on the top side (default: 0).
pady1: Number of pixels to pad on the bottom side (default: 0).
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
Returns:
Tensor of the shape `[majorDim, outH, outW, minorDim]`, and same datatype as `x`.
"""
impl_dict = {
'ref': _upfirdn_2d_ref,
'cuda': _upfirdn_2d_cuda,
}
return impl_dict[impl](x=x, k=k, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1)
#----------------------------------------------------------------------------
def _upfirdn_2d_ref(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1):
"""Slow reference implementation of `upfirdn_2d()` using standard TensorFlow ops."""
x = tf.convert_to_tensor(x)
k = np.asarray(k, dtype=np.float32)
assert x.shape.rank == 4
inH = x.shape[1].value
inW = x.shape[2].value
minorDim = _shape(x, 3)
kernelH, kernelW = k.shape
assert inW >= 1 and inH >= 1
assert kernelW >= 1 and kernelH >= 1
assert isinstance(upx, int) and isinstance(upy, int)
assert isinstance(downx, int) and isinstance(downy, int)
assert isinstance(padx0, int) and isinstance(padx1, int)
assert isinstance(pady0, int) and isinstance(pady1, int)
# Upsample (insert zeros).
x = tf.reshape(x, [-1, inH, 1, inW, 1, minorDim])
x = tf.pad(x, [[0, 0], [0, 0], [0, upy - 1], [0, 0], [0, upx - 1], [0, 0]])
x = tf.reshape(x, [-1, inH * upy, inW * upx, minorDim])
# Pad (crop if negative).
x = tf.pad(x, [[0, 0], [max(pady0, 0), max(pady1, 0)], [max(padx0, 0), max(padx1, 0)], [0, 0]])
x = x[:, max(-pady0, 0) : x.shape[1].value - max(-pady1, 0), max(-padx0, 0) : x.shape[2].value - max(-padx1, 0), :]
# Convolve with filter.
x = tf.transpose(x, [0, 3, 1, 2])
x = tf.reshape(x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1])
w = tf.constant(k[::-1, ::-1, np.newaxis, np.newaxis], dtype=x.dtype)
x = tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='VALID', data_format='NCHW')
x = tf.reshape(x, [-1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1])
x = tf.transpose(x, [0, 2, 3, 1])
# Downsample (throw away pixels).
return x[:, ::downy, ::downx, :]
#----------------------------------------------------------------------------
def _upfirdn_2d_cuda(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1):
"""Fast CUDA implementation of `upfirdn_2d()` using custom ops."""
x = tf.convert_to_tensor(x)
k = np.asarray(k, dtype=np.float32)
majorDim, inH, inW, minorDim = x.shape.as_list()
kernelH, kernelW = k.shape
assert inW >= 1 and inH >= 1
assert kernelW >= 1 and kernelH >= 1
assert isinstance(upx, int) and isinstance(upy, int)
assert isinstance(downx, int) and isinstance(downy, int)
assert isinstance(padx0, int) and isinstance(padx1, int)
assert isinstance(pady0, int) and isinstance(pady1, int)
outW = (inW * upx + padx0 + padx1 - kernelW) // downx + 1
outH = (inH * upy + pady0 + pady1 - kernelH) // downy + 1
assert outW >= 1 and outH >= 1
kc = tf.constant(k, dtype=x.dtype)
gkc = tf.constant(k[::-1, ::-1], dtype=x.dtype)
gpadx0 = kernelW - padx0 - 1
gpady0 = kernelH - pady0 - 1
gpadx1 = inW * upx - outW * downx + padx0 - upx + 1
gpady1 = inH * upy - outH * downy + pady0 - upy + 1
@tf.custom_gradient
def func(x):
y = _get_plugin().up_fir_dn2d(x=x, k=kc, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1)
y.set_shape([majorDim, outH, outW, minorDim])
@tf.custom_gradient
def grad(dy):
dx = _get_plugin().up_fir_dn2d(x=dy, k=gkc, upx=downx, upy=downy, downx=upx, downy=upy, padx0=gpadx0, padx1=gpadx1, pady0=gpady0, pady1=gpady1)
dx.set_shape([majorDim, inH, inW, minorDim])
return dx, func
return y, grad
return func(x)
#----------------------------------------------------------------------------
def filter_2d(x, k, gain=1, data_format='NCHW', impl='cuda'):
r"""Filter a batch of 2D images with the given FIR filter.
Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
and filters each image with the given filter. The filter is normalized so that
if the input pixels are constant, they will be scaled by the specified `gain`.
Pixels outside the image are assumed to be zero.
Args:
x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
gain: Scaling factor for signal magnitude (default: 1.0).
data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
Returns:
Tensor of the same shape and datatype as `x`.
"""
k = _setup_kernel(k) * gain
p = k.shape[0] - 1
return _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl)
#----------------------------------------------------------------------------
def upsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
r"""Upsample a batch of 2D images with the given filter.
Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
and upsamples each image with the given filter. The filter is normalized so that
if the input pixels are constant, they will be scaled by the specified `gain`.
Pixels outside the image are assumed to be zero, and the filter is padded with
zeros so that its shape is a multiple of the upsampling factor.
Args:
x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
The default is `[1] * factor`, which corresponds to nearest-neighbor
upsampling.
factor: Integer upsampling factor (default: 2).
gain: Scaling factor for signal magnitude (default: 1.0).
data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
Returns:
Tensor of the shape `[N, C, H * factor, W * factor]` or
`[N, H * factor, W * factor, C]`, and same datatype as `x`.
"""
assert isinstance(factor, int) and factor >= 1
if k is None:
k = [1] * factor
k = _setup_kernel(k) * (gain * (factor ** 2))
p = k.shape[0] - factor
return _simple_upfirdn_2d(x, k, up=factor, pad0=(p+1)//2+factor-1, pad1=p//2, data_format=data_format, impl=impl)
#----------------------------------------------------------------------------
def downsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
r"""Downsample a batch of 2D images with the given filter.
Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
and downsamples each image with the given filter. The filter is normalized so that
if the input pixels are constant, they will be scaled by the specified `gain`.
Pixels outside the image are assumed to be zero, and the filter is padded with
zeros so that its shape is a multiple of the downsampling factor.
Args:
x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
The default is `[1] * factor`, which corresponds to average pooling.
factor: Integer downsampling factor (default: 2).
gain: Scaling factor for signal magnitude (default: 1.0).
data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
Returns:
Tensor of the shape `[N, C, H // factor, W // factor]` or
`[N, H // factor, W // factor, C]`, and same datatype as `x`.
"""
assert isinstance(factor, int) and factor >= 1
if k is None:
k = [1] * factor
k = _setup_kernel(k) * gain
p = k.shape[0] - factor
return _simple_upfirdn_2d(x, k, down=factor, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl)
#----------------------------------------------------------------------------
def upsample_conv_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
r"""Fused `upsample_2d()` followed by `tf.nn.conv2d()`.
Padding is performed only once at the beginning, not between the operations.
The fused op is considerably more efficient than performing the same calculation
using standard TensorFlow ops. It supports gradients of arbitrary order.
Args:
x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`.
Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`.
k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
The default is `[1] * factor`, which corresponds to nearest-neighbor
upsampling.
factor: Integer upsampling factor (default: 2).
gain: Scaling factor for signal magnitude (default: 1.0).
data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
Returns:
Tensor of the shape `[N, C, H * factor, W * factor]` or
`[N, H * factor, W * factor, C]`, and same datatype as `x`.
"""
assert isinstance(factor, int) and factor >= 1
# Check weight shape.
w = tf.convert_to_tensor(w)
assert w.shape.rank == 4
convH = w.shape[0].value
convW = w.shape[1].value
inC = _shape(w, 2)
outC = _shape(w, 3)
assert convW == convH
# Setup filter kernel.
if k is None:
k = [1] * factor
k = _setup_kernel(k) * (gain * (factor ** 2))
p = (k.shape[0] - factor) - (convW - 1)
# Determine data dimensions.
if data_format == 'NCHW':
stride = [1, 1, factor, factor]
output_shape = [_shape(x, 0), outC, (_shape(x, 2) - 1) * factor + convH, (_shape(x, 3) - 1) * factor + convW]
num_groups = _shape(x, 1) // inC
else:
stride = [1, factor, factor, 1]
output_shape = [_shape(x, 0), (_shape(x, 1) - 1) * factor + convH, (_shape(x, 2) - 1) * factor + convW, outC]
num_groups = _shape(x, 3) // inC
# Transpose weights.
w = tf.reshape(w, [convH, convW, inC, num_groups, -1])
w = tf.transpose(w[::-1, ::-1], [0, 1, 4, 3, 2])
w = tf.reshape(w, [convH, convW, -1, num_groups * inC])
# Execute.
x = tf.nn.conv2d_transpose(x, w, output_shape=output_shape, strides=stride, padding='VALID', data_format=data_format)
return _simple_upfirdn_2d(x, k, pad0=(p+1)//2+factor-1, pad1=p//2+1, data_format=data_format, impl=impl)
#----------------------------------------------------------------------------
def conv_downsample_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
r"""Fused `tf.nn.conv2d()` followed by `downsample_2d()`.
Padding is performed only once at the beginning, not between the operations.
The fused op is considerably more efficient than performing the same calculation
using standard TensorFlow ops. It supports gradients of arbitrary order.
Args:
x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`.
Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`.
k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
The default is `[1] * factor`, which corresponds to average pooling.
factor: Integer downsampling factor (default: 2).
gain: Scaling factor for signal magnitude (default: 1.0).
data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
Returns:
Tensor of the shape `[N, C, H // factor, W // factor]` or
`[N, H // factor, W // factor, C]`, and same datatype as `x`.
"""
assert isinstance(factor, int) and factor >= 1
w = tf.convert_to_tensor(w)
convH, convW, _inC, _outC = w.shape.as_list()
assert convW == convH
if k is None:
k = [1] * factor
k = _setup_kernel(k) * gain
p = (k.shape[0] - factor) + (convW - 1)
if data_format == 'NCHW':
s = [1, 1, factor, factor]
else:
s = [1, factor, factor, 1]
x = _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl)
return tf.nn.conv2d(x, w, strides=s, padding='VALID', data_format=data_format)
#----------------------------------------------------------------------------
# Internal helper funcs.
def _shape(tf_expr, dim_idx):
if tf_expr.shape.rank is not None:
dim = tf_expr.shape[dim_idx].value
if dim is not None:
return dim
return tf.shape(tf_expr)[dim_idx]
def _setup_kernel(k):
k = np.asarray(k, dtype=np.float32)
if k.ndim == 1:
k = np.outer(k, k)
k /= np.sum(k)
assert k.ndim == 2
assert k.shape[0] == k.shape[1]
return k
def _simple_upfirdn_2d(x, k, up=1, down=1, pad0=0, pad1=0, data_format='NCHW', impl='cuda'):
assert data_format in ['NCHW', 'NHWC']
assert x.shape.rank == 4
y = x
if data_format == 'NCHW':
y = tf.reshape(y, [-1, _shape(y, 2), _shape(y, 3), 1])
y = upfirdn_2d(y, k, upx=up, upy=up, downx=down, downy=down, padx0=pad0, padx1=pad1, pady0=pad0, pady1=pad1, impl=impl)
if data_format == 'NCHW':
y = tf.reshape(y, [-1, _shape(x, 1), _shape(y, 1), _shape(y, 2)])
return y
#----------------------------------------------------------------------------
================================================
FILE: dnnlib/tflib/optimizer.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Helper wrapper for a Tensorflow optimizer."""
import platform
import numpy as np
import tensorflow as tf
from collections import OrderedDict
from typing import List, Union
from . import autosummary
from . import tfutil
from .. import util
from .tfutil import TfExpression, TfExpressionEx
_collective_ops_warning_printed = False
_collective_ops_group_key = 831766147
_collective_ops_instance_key = 436340067
class Optimizer:
"""A Wrapper for tf.train.Optimizer.
Automatically takes care of:
- Gradient averaging for multi-GPU training.
- Gradient accumulation for arbitrarily large minibatches.
- Dynamic loss scaling and typecasts for FP16 training.
- Ignoring corrupted gradients that contain NaNs/Infs.
- Reporting statistics.
- Well-chosen default settings.
"""
def __init__(self,
name: str = "Train", # Name string that will appear in TensorFlow graph.
tf_optimizer: str = "tf.train.AdamOptimizer", # Underlying optimizer class.
learning_rate: TfExpressionEx = 0.001, # Learning rate. Can vary over time.
minibatch_multiplier: TfExpressionEx = None, # Treat N consecutive minibatches as one by accumulating gradients.
share: "Optimizer" = None, # Share internal state with a previously created optimizer?
use_loss_scaling: bool = False, # Enable dynamic loss scaling for robust mixed-precision training?
loss_scaling_init: float = 64.0, # Log2 of initial loss scaling factor.
loss_scaling_inc: float = 0.0005, # Log2 of per-minibatch loss scaling increment when there is no overflow.
loss_scaling_dec: float = 1.0, # Log2 of per-minibatch loss scaling decrement when there is an overflow.
report_mem_usage: bool = False, # Report fine-grained memory usage statistics in TensorBoard?
**kwargs):
# Public fields.
self.name = name
self.learning_rate = learning_rate
self.minibatch_multiplier = minibatch_multiplier
self.id = self.name.replace("/", ".")
self.scope = tf.get_default_graph().unique_name(self.id)
self.optimizer_class = util.get_obj_by_name(tf_optimizer)
self.optimizer_kwargs = dict(kwargs)
self.use_loss_scaling = use_loss_scaling
self.loss_scaling_init = loss_scaling_init
self.loss_scaling_inc = loss_scaling_inc
self.loss_scaling_dec = loss_scaling_dec
# Private fields.
self._updates_applied = False
self._devices = OrderedDict() # device_name => EasyDict()
self._shared_optimizers = OrderedDict() # device_name => optimizer_class
self._gradient_shapes = None # [shape, ...]
self._report_mem_usage = report_mem_usage
# Validate arguments.
assert callable(self.optimizer_class)
# Share internal state if requested.
if share is not None:
assert isinstance(share, Optimizer)
assert self.optimizer_class is share.optimizer_class
assert self.learning_rate is share.learning_rate
assert self.optimizer_kwargs == share.optimizer_kwargs
self._shared_optimizers = share._shared_optimizers # pylint: disable=protected-access
def _get_device(self, device_name: str):
"""Get internal state for the given TensorFlow device."""
tfutil.assert_tf_initialized()
if device_name in self._devices:
return self._devices[device_name]
# Initialize fields.
device = util.EasyDict()
device.name = device_name
device.optimizer = None # Underlying optimizer: optimizer_class
device.loss_scaling_var = None # Log2 of loss scaling: tf.Variable
device.grad_raw = OrderedDict() # Raw gradients: var => [grad, ...]
device.grad_clean = OrderedDict() # Clean gradients: var => grad
device.grad_acc_vars = OrderedDict() # Accumulation sums: var => tf.Variable
device.grad_acc_count = None # Accumulation counter: tf.Variable
device.grad_acc = OrderedDict() # Accumulated gradients: var => grad
# Setup TensorFlow objects.
with tfutil.absolute_name_scope(self.scope + "/Devices"), tf.device(device_name), tf.control_dependencies(None):
if device_name not in self._shared_optimizers:
optimizer_name = self.scope.replace("/", "_") + "_opt%d" % len(self._shared_optimizers)
self._shared_optimizers[device_name] = self.optimizer_class(name=optimizer_name, learning_rate=self.learning_rate, **self.optimizer_kwargs)
device.optimizer = self._shared_optimizers[device_name]
if self.use_loss_scaling:
device.loss_scaling_var = tf.Variable(np.float32(self.loss_scaling_init), trainable=False, name="loss_scaling_var")
# Register device.
self._devices[device_name] = device
return device
def register_gradients(self, loss: TfExpression, trainable_vars: Union[List, dict]) -> None:
"""Register the gradients of the given loss function with respect to the given variables.
Intended to be called once per GPU."""
tfutil.assert_tf_initialized()
assert not self._updates_applied
device = self._get_device(loss.device)
# Validate trainables.
if isinstance(trainable_vars, dict):
trainable_vars = list(trainable_vars.values()) # allow passing in Network.trainables as vars
assert isinstance(trainable_vars, list) and len(trainable_vars) >= 1
assert all(tfutil.is_tf_expression(expr) for expr in trainable_vars + [loss])
assert all(var.device == device.name for var in trainable_vars)
# Validate shapes.
if self._gradient_shapes is None:
self._gradient_shapes = [var.shape.as_list() for var in trainable_vars]
assert len(trainable_vars) == len(self._gradient_shapes)
assert all(var.shape.as_list() == var_shape for var, var_shape in zip(trainable_vars, self._gradient_shapes))
# Report memory usage if requested.
deps = []
if self._report_mem_usage:
self._report_mem_usage = False
try:
with tf.name_scope(self.id + '_mem'), tf.device(device.name), tf.control_dependencies([loss]):
deps.append(autosummary.autosummary(self.id + "/mem_usage_gb", tf.contrib.memory_stats.BytesInUse() / 2**30))
except tf.errors.NotFoundError:
pass
# Compute gradients.
with tf.name_scope(self.id + "_grad"), tf.device(device.name), tf.control_dependencies(deps):
loss = self.apply_loss_scaling(tf.cast(loss, tf.float32))
gate = tf.train.Optimizer.GATE_NONE # disable gating to reduce memory usage
grad_list = device.optimizer.compute_gradients(loss=loss, var_list=trainable_vars, gate_gradients=gate)
# Register gradients.
for grad, var in grad_list:
if var not in device.grad_raw:
device.grad_raw[var] = []
device.grad_raw[var].append(grad)
def apply_updates(self, allow_no_op: bool = False) -> tf.Operation:
"""Construct training op to update the registered variables based on their gradients."""
tfutil.assert_tf_initialized()
assert not self._updates_applied
self._updates_applied = True
all_ops = []
# Check for no-op.
if allow_no_op and len(self._devices) == 0:
with tfutil.absolute_name_scope(self.scope):
return tf.no_op(name='TrainingOp')
# Clean up gradients.
for device_idx, device in enumerate(self._devices.values()):
with tfutil.absolute_name_scope(self.scope + "/Clean%d" % device_idx), tf.device(device.name):
for var, grad in device.grad_raw.items():
# Filter out disconnected gradients and convert to float32.
grad = [g for g in grad if g is not None]
grad = [tf.cast(g, tf.float32) for g in grad]
# Sum within the device.
if len(grad) == 0:
grad = tf.zeros(var.shape) # No gradients => zero.
elif len(grad) == 1:
grad = grad[0] # Single gradient => use as is.
else:
grad = tf.add_n(grad) # Multiple gradients => sum.
# Scale as needed.
scale = 1.0 / len(device.grad_raw[var]) / len(self._devices)
scale = tf.constant(scale, dtype=tf.float32, name="scale")
if self.minibatch_multiplier is not None:
scale /= tf.cast(self.minibatch_multiplier, tf.float32)
scale = self.undo_loss_scaling(scale)
device.grad_clean[var] = grad * scale
# Sum gradients across devices.
if len(self._devices) > 1:
with tfutil.absolute_name_scope(self.scope + "/Broadcast"), tf.device(None):
if platform.system() == "Windows": # Windows => NCCL ops are not available.
self._broadcast_fallback()
elif tf.VERSION.startswith("1.15."): # TF 1.15 => NCCL ops are broken: https://github.com/tensorflow/tensorflow/issues/41539
self._broadcast_fallback()
else: # Otherwise => NCCL ops are safe to use.
self._broadcast_nccl()
# Apply updates separately on each device.
for device_idx, device in enumerate(self._devices.values()):
with tfutil.absolute_name_scope(self.scope + "/Apply%d" % device_idx), tf.device(device.name):
# pylint: disable=cell-var-from-loop
# Accumulate gradients over time.
if self.minibatch_multiplier is None:
acc_ok = tf.constant(True, name='acc_ok')
device.grad_acc = OrderedDict(device.grad_clean)
else:
# Create variables.
with tf.control_dependencies(None):
for var in device.grad_clean.keys():
device.grad_acc_vars[var] = tf.Variable(tf.zeros(var.shape), trainable=False, name="grad_acc_var")
device.grad_acc_count = tf.Variable(tf.zeros([]), trainable=False, name="grad_acc_count")
# Track counter.
count_cur = device.grad_acc_count + 1.0
count_inc_op = lambda: tf.assign(device.grad_acc_count, count_cur)
count_reset_op = lambda: tf.assign(device.grad_acc_count, tf.zeros([]))
acc_ok = (count_cur >= tf.cast(self.minibatch_multiplier, tf.float32))
all_ops.append(tf.cond(acc_ok, count_reset_op, count_inc_op))
# Track gradients.
for var, grad in device.grad_clean.items():
acc_var = device.grad_acc_vars[var]
acc_cur = acc_var + grad
device.grad_acc[var] = acc_cur
with tf.control_dependencies([acc_cur]):
acc_inc_op = lambda: tf.assign(acc_var, acc_cur)
acc_reset_op = lambda: tf.assign(acc_var, tf.zeros(var.shape))
all_ops.append(tf.cond(acc_ok, acc_reset_op, acc_inc_op))
# No overflow => apply gradients.
all_ok = tf.reduce_all(tf.stack([acc_ok] + [tf.reduce_all(tf.is_finite(g)) for g in device.grad_acc.values()]))
apply_op = lambda: device.optimizer.apply_gradients([(tf.cast(grad, var.dtype), var) for var, grad in device.grad_acc.items()])
all_ops.append(tf.cond(all_ok, apply_op, tf.no_op))
# Adjust loss scaling.
if self.use_loss_scaling:
ls_inc_op = lambda: tf.assign_add(device.loss_scaling_var, self.loss_scaling_inc)
ls_dec_op = lambda: tf.assign_sub(device.loss_scaling_var, self.loss_scaling_dec)
ls_update_op = lambda: tf.group(tf.cond(all_ok, ls_inc_op, ls_dec_op))
all_ops.append(tf.cond(acc_ok, ls_update_op, tf.no_op))
# Last device => report statistics.
if device_idx == len(self._devices) - 1:
all_ops.append(autosummary.autosummary(self.id + "/learning_rate", tf.convert_to_tensor(self.learning_rate)))
all_ops.append(autosummary.autosummary(self.id + "/overflow_frequency", tf.where(all_ok, 0, 1), condition=acc_ok))
if self.use_loss_scaling:
all_ops.append(autosummary.autosummary(self.id + "/loss_scaling_log2", device.loss_scaling_var))
# Initialize variables.
self.reset_optimizer_state()
if self.use_loss_scaling:
tfutil.init_uninitialized_vars([device.loss_scaling_var for device in self._devices.values()])
if self.minibatch_multiplier is not None:
tfutil.run([var.initializer for device in self._devices.values() for var in list(device.grad_acc_vars.values()) + [device.grad_acc_count]])
# Group everything into a single op.
with tfutil.absolute_name_scope(self.scope):
return tf.group(*all_ops, name="TrainingOp")
def reset_optimizer_state(self) -> None:
"""Reset internal state of the underlying optimizer."""
tfutil.assert_tf_initialized()
tfutil.run([var.initializer for device in self._devices.values() for var in device.optimizer.variables()])
def get_loss_scaling_var(self, device: str) -> Union[tf.Variable, None]:
"""Get or create variable representing log2 of the current dynamic loss scaling factor."""
return self._get_device(device).loss_scaling_var
def apply_loss_scaling(self, value: TfExpression) -> TfExpression:
"""Apply dynamic loss scaling for the given expression."""
assert tfutil.is_tf_expression(value)
if not self.use_loss_scaling:
return value
return value * tfutil.exp2(self.get_loss_scaling_var(value.device))
def undo_loss_scaling(self, value: TfExpression) -> TfExpression:
"""Undo the effect of dynamic loss scaling for the given expression."""
assert tfutil.is_tf_expression(value)
if not self.use_loss_scaling:
return value
return value * tfutil.exp2(-self.get_loss_scaling_var(value.device)) # pylint: disable=invalid-unary-operand-type
def _broadcast_nccl(self):
"""Sum gradients across devices using NCCL ops (fast path)."""
from tensorflow.python.ops import nccl_ops # pylint: disable=no-name-in-module
for all_vars in zip(*[device.grad_clean.keys() for device in self._devices.values()]):
if any(x.shape.num_elements() > 0 for x in all_vars):
all_grads = [device.grad_clean[var] for device, var in zip(self._devices.values(), all_vars)]
all_grads = nccl_ops.all_sum(all_grads)
for device, var, grad in zip(self._devices.values(), all_vars, all_grads):
device.grad_clean[var] = grad
def _broadcast_fallback(self):
"""Sum gradients across devices using TensorFlow collective ops (slow fallback path)."""
from tensorflow.python.ops import collective_ops # pylint: disable=no-name-in-module
global _collective_ops_warning_printed, _collective_ops_group_key, _collective_ops_instance_key
if all(x.shape.num_elements() == 0 for device in self._devices.values() for x in device.grad_clean.values()):
return
if not _collective_ops_warning_printed:
print("------------------------------------------------------------------------")
print("WARNING: Using slow fallback implementation for inter-GPU communication.")
print("Please use TensorFlow 1.14 on Linux for optimal training performance.")
print("------------------------------------------------------------------------")
_collective_ops_warning_printed = True
for device in self._devices.values():
with tf.device(device.name):
combo = [tf.reshape(x, [x.shape.num_elements()]) for x in device.grad_clean.values()]
combo = tf.concat(combo, axis=0)
combo = collective_ops.all_reduce(combo, merge_op='Add', final_op='Id',
group_size=len(self._devices), group_key=_collective_ops_group_key,
instance_key=_collective_ops_instance_key)
cur_ofs = 0
for var, grad_old in device.grad_clean.items():
grad_new = tf.reshape(combo[cur_ofs : cur_ofs + grad_old.shape.num_elements()], grad_old.shape)
cur_ofs += grad_old.shape.num_elements()
device.grad_clean[var] = grad_new
_collective_ops_instance_key += 1
class SimpleAdam:
"""Simplified version of tf.train.AdamOptimizer that behaves identically when used with dnnlib.tflib.Optimizer."""
def __init__(self, name="Adam", learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
self.name = name
self.learning_rate = learning_rate
self.beta1 = beta1
self.beta2 = beta2
self.epsilon = epsilon
self.all_state_vars = []
def variables(self):
return self.all_state_vars
def compute_gradients(self, loss, var_list, gate_gradients=tf.train.Optimizer.GATE_NONE):
assert gate_gradients == tf.train.Optimizer.GATE_NONE
return list(zip(tf.gradients(loss, var_list), var_list))
def apply_gradients(self, grads_and_vars):
with tf.name_scope(self.name):
state_vars = []
update_ops = []
# Adjust learning rate to deal with startup bias.
with tf.control_dependencies(None):
b1pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False)
b2pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False)
state_vars += [b1pow_var, b2pow_var]
b1pow_new = b1pow_var * self.beta1
b2pow_new = b2pow_var * self.beta2
update_ops += [tf.assign(b1pow_var, b1pow_new), tf.assign(b2pow_var, b2pow_new)]
lr_new = self.learning_rate * tf.sqrt(1 - b2pow_new) / (1 - b1pow_new)
# Construct ops to update each variable.
for grad, var in grads_and_vars:
with tf.control_dependencies(None):
m_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False)
v_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False)
state_vars += [m_var, v_var]
m_new = self.beta1 * m_var + (1 - self.beta1) * grad
v_new = self.beta2 * v_var + (1 - self.beta2) * tf.square(grad)
var_delta = lr_new * m_new / (tf.sqrt(v_new) + self.epsilon)
update_ops += [tf.assign(m_var, m_new), tf.assign(v_var, v_new), tf.assign_sub(var, var_delta)]
# Group everything together.
self.all_state_vars += state_vars
return tf.group(*update_ops)
================================================
FILE: dnnlib/tflib/tfutil.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Miscellaneous helper utils for Tensorflow."""
import os
import numpy as np
import tensorflow as tf
# Silence deprecation warnings from TensorFlow 1.13 onwards
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)
import tensorflow.contrib # requires TensorFlow 1.x!
tf.contrib = tensorflow.contrib
from typing import Any, Iterable, List, Union
TfExpression = Union[tf.Tensor, tf.Variable, tf.Operation]
"""A type that represents a valid Tensorflow expression."""
TfExpressionEx = Union[TfExpression, int, float, np.ndarray]
"""A type that can be converted to a valid Tensorflow expression."""
def run(*args, **kwargs) -> Any:
"""Run the specified ops in the default session."""
assert_tf_initialized()
return tf.get_default_session().run(*args, **kwargs)
def is_tf_expression(x: Any) -> bool:
"""Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation."""
return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation))
def shape_to_list(shape: Iterable[tf.Dimension]) -> List[Union[int, None]]:
"""Convert a Tensorflow shape to a list of ints. Retained for backwards compatibility -- use TensorShape.as_list() in new code."""
return [dim.value for dim in shape]
def flatten(x: TfExpressionEx) -> TfExpression:
"""Shortcut function for flattening a tensor."""
with tf.name_scope("Flatten"):
return tf.reshape(x, [-1])
def log2(x: TfExpressionEx) -> TfExpression:
"""Logarithm in base 2."""
with tf.name_scope("Log2"):
return tf.log(x) * np.float32(1.0 / np.log(2.0))
def exp2(x: TfExpressionEx) -> TfExpression:
"""Exponent in base 2."""
with tf.name_scope("Exp2"):
return tf.exp(x * np.float32(np.log(2.0)))
def lerp(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpressionEx:
"""Linear interpolation."""
with tf.name_scope("Lerp"):
return a + (b - a) * t
def lerp_clip(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpression:
"""Linear interpolation with clip."""
with tf.name_scope("LerpClip"):
return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0)
def absolute_name_scope(scope: str) -> tf.name_scope:
"""Forcefully enter the specified name scope, ignoring any surrounding scopes."""
return tf.name_scope(scope + "/")
def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope:
"""Forcefully enter the specified variable scope, ignoring any surrounding scopes."""
return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False)
def _sanitize_tf_config(config_dict: dict = None) -> dict:
# Defaults.
cfg = dict()
cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is.
cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is.
cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info.
cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used.
cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed.
# Remove defaults for environment variables that are already set.
for key in list(cfg):
fields = key.split(".")
if fields[0] == "env":
assert len(fields) == 2
if fields[1] in os.environ:
del cfg[key]
# User overrides.
if config_dict is not None:
cfg.update(config_dict)
return cfg
def init_tf(config_dict: dict = None) -> None:
"""Initialize TensorFlow session using good default settings."""
# Skip if already initialized.
if tf.get_default_session() is not None:
return
# Setup config dict and random seeds.
cfg = _sanitize_tf_config(config_dict)
np_random_seed = cfg["rnd.np_random_seed"]
if np_random_seed is not None:
np.random.seed(np_random_seed)
tf_random_seed = cfg["rnd.tf_random_seed"]
if tf_random_seed == "auto":
tf_random_seed = np.random.randint(1 << 31)
if tf_random_seed is not None:
tf.set_random_seed(tf_random_seed)
# Setup environment variables.
for key, value in cfg.items():
fields = key.split(".")
if fields[0] == "env":
assert len(fields) == 2
os.environ[fields[1]] = str(value)
# Create default TensorFlow session.
create_session(cfg, force_as_default=True)
def assert_tf_initialized():
"""Check that TensorFlow session has been initialized."""
if tf.get_default_session() is None:
raise RuntimeError("No default TensorFlow session found. Please call dnnlib.tflib.init_tf().")
def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session:
"""Create tf.Session based on config dict."""
# Setup TensorFlow config proto.
cfg = _sanitize_tf_config(config_dict)
config_proto = tf.ConfigProto()
for key, value in cfg.items():
fields = key.split(".")
if fields[0] not in ["rnd", "env"]:
obj = config_proto
for field in fields[:-1]:
obj = getattr(obj, field)
setattr(obj, fields[-1], value)
# Create session.
session = tf.Session(config=config_proto)
if force_as_default:
# pylint: disable=protected-access
session._default_session = session.as_default()
session._default_session.enforce_nesting = False
session._default_session.__enter__()
return session
def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None:
"""Initialize all tf.Variables that have not already been initialized.
Equivalent to the following, but more efficient and does not bloat the tf graph:
tf.variables_initializer(tf.report_uninitialized_variables()).run()
"""
assert_tf_initialized()
if target_vars is None:
target_vars = tf.global_variables()
test_vars = []
test_ops = []
with tf.control_dependencies(None): # ignore surrounding control_dependencies
for var in target_vars:
assert is_tf_expression(var)
try:
tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0"))
except KeyError:
# Op does not exist => variable may be uninitialized.
test_vars.append(var)
with absolute_name_scope(var.name.split(":")[0]):
test_ops.append(tf.is_variable_initialized(var))
init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited]
run([var.initializer for var in init_vars])
def set_vars(var_to_value_dict: dict) -> None:
"""Set the values of given tf.Variables.
Equivalent to the following, but more efficient and does not bloat the tf graph:
tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()]
"""
assert_tf_initialized()
ops = []
feed_dict = {}
for var, value in var_to_value_dict.items():
assert is_tf_expression(var)
try:
setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0")) # look for existing op
except KeyError:
with absolute_name_scope(var.name.split(":")[0]):
with tf.control_dependencies(None): # ignore surrounding control_dependencies
setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter") # create new setter
ops.append(setter)
feed_dict[setter.op.inputs[1]] = value
run(ops, feed_dict)
def create_var_with_large_initial_value(initial_value: np.ndarray, *args, **kwargs):
"""Create tf.Variable with large initial value without bloating the tf graph."""
assert_tf_initialized()
assert isinstance(initial_value, np.ndarray)
zeros = tf.zeros(initial_value.shape, initial_value.dtype)
var = tf.Variable(zeros, *args, **kwargs)
set_vars({var: initial_value})
return var
def convert_images_from_uint8(images, drange=[-1,1], nhwc_to_nchw=False):
"""Convert a minibatch of images from uint8 to float32 with configurable dynamic range.
Can be used as an input transformation for Network.run().
"""
images = tf.cast(images, tf.float32)
if nhwc_to_nchw:
images = tf.transpose(images, [0, 3, 1, 2])
return images * ((drange[1] - drange[0]) / 255) + drange[0]
def convert_images_to_uint8(images, drange=[-1,1], nchw_to_nhwc=False, shrink=1):
"""Convert a minibatch of images from float32 to uint8 with configurable dynamic range.
Can be used as an output transformation for Network.run().
"""
images = tf.cast(images, tf.float32)
if shrink > 1:
ksize = [1, 1, shrink, shrink]
images = tf.nn.avg_pool(images, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW")
if nchw_to_nhwc:
images = tf.transpose(images, [0, 2, 3, 1])
scale = 255 / (drange[1] - drange[0])
images = images * scale + (0.5 - drange[0] * scale)
return tf.saturate_cast(images, tf.uint8)
================================================
FILE: dnnlib/util.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Miscellaneous utility classes and functions."""
import ctypes
import fnmatch
import importlib
import inspect
import numpy as np
import os
import shutil
import sys
import types
import io
import pickle
import re
import requests
import html
import hashlib
import glob
import uuid
from distutils.util import strtobool
from typing import Any, List, Tuple, Union
# Util classes
# ------------------------------------------------------------------------------------------
class EasyDict(dict):
"""Convenience class that behaves like a dict but allows access with the attribute syntax."""
def __getattr__(self, name: str) -> Any:
try:
return self[name]
except KeyError:
raise AttributeError(name)
def __setattr__(self, name: str, value: Any) -> None:
self[name] = value
def __delattr__(self, name: str) -> None:
del self[name]
class Logger(object):
"""Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file."""
def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True):
self.file = None
if file_name is not None:
self.file = open(file_name, file_mode)
self.should_flush = should_flush
self.stdout = sys.stdout
self.stderr = sys.stderr
sys.stdout = self
sys.stderr = self
def __enter__(self) -> "Logger":
return self
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
self.close()
def write(self, text: str) -> None:
"""Write text to stdout (and a file) and optionally flush."""
if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash
return
if self.file is not None:
self.file.write(text)
self.stdout.write(text)
if self.should_flush:
self.flush()
def flush(self) -> None:
"""Flush written text to both stdout and a file, if open."""
if self.file is not None:
self.file.flush()
self.stdout.flush()
def close(self) -> None:
"""Flush, close possible files, and remove stdout/stderr mirroring."""
self.flush()
# if using multiple loggers, prevent closing in wrong order
if sys.stdout is self:
sys.stdout = self.stdout
if sys.stderr is self:
sys.stderr = self.stderr
if self.file is not None:
self.file.close()
# Small util functions
# ------------------------------------------------------------------------------------------
def format_time(seconds: Union[int, float]) -> str:
"""Convert the seconds to human readable string with days, hours, minutes and seconds."""
s = int(np.rint(seconds))
if s < 60:
return "{0}s".format(s)
elif s < 60 * 60:
return "{0}m {1:02}s".format(s // 60, s % 60)
elif s < 24 * 60 * 60:
return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60)
else:
return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60)
def ask_yes_no(question: str) -> bool:
"""Ask the user the question until the user inputs a valid answer."""
while True:
try:
print("{0} [y/n]".format(question))
return strtobool(input().lower())
except ValueError:
pass
def tuple_product(t: Tuple) -> Any:
"""Calculate the product of the tuple elements."""
result = 1
for v in t:
result *= v
return result
_str_to_ctype = {
"uint8": ctypes.c_ubyte,
"uint16": ctypes.c_uint16,
"uint32": ctypes.c_uint32,
"uint64": ctypes.c_uint64,
"int8": ctypes.c_byte,
"int16": ctypes.c_int16,
"int32": ctypes.c_int32,
"int64": ctypes.c_int64,
"float32": ctypes.c_float,
"float64": ctypes.c_double
}
def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]:
"""Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes."""
type_str = None
if isinstance(type_obj, str):
type_str = type_obj
elif hasattr(type_obj, "__name__"):
type_str = type_obj.__name__
elif hasattr(type_obj, "name"):
type_str = type_obj.name
else:
raise RuntimeError("Cannot infer type name from input")
assert type_str in _str_to_ctype.keys()
my_dtype = np.dtype(type_str)
my_ctype = _str_to_ctype[type_str]
assert my_dtype.itemsize == ctypes.sizeof(my_ctype)
return my_dtype, my_ctype
def is_pickleable(obj: Any) -> bool:
try:
with io.BytesIO() as stream:
pickle.dump(obj, stream)
return True
except:
return False
# Functionality to import modules/objects by name, and call functions by name
# ------------------------------------------------------------------------------------------
def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]:
"""Searches for the underlying module behind the name to some python object.
Returns the module and the object name (original name with module part removed)."""
# allow convenience shorthands, substitute them by full names
obj_name = re.sub("^np.", "numpy.", obj_name)
obj_name = re.sub("^tf.", "tensorflow.", obj_name)
# list alternatives for (module_name, local_obj_name)
parts = obj_name.split(".")
name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)]
# try each alternative in turn
for module_name, local_obj_name in name_pairs:
try:
module = importlib.import_module(module_name) # may raise ImportError
get_obj_from_module(module, local_obj_name) # may raise AttributeError
return module, local_obj_name
except:
pass
# maybe some of the modules themselves contain errors?
for module_name, _local_obj_name in name_pairs:
try:
importlib.import_module(module_name) # may raise ImportError
except ImportError:
if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"):
raise
# maybe the requested attribute is missing?
for module_name, local_obj_name in name_pairs:
try:
module = importlib.import_module(module_name) # may raise ImportError
get_obj_from_module(module, local_obj_name) # may raise AttributeError
except ImportError:
pass
# we are out of luck, but we have no idea why
raise ImportError(obj_name)
def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any:
"""Traverses the object name and returns the last (rightmost) python object."""
if obj_name == '':
return module
obj = module
for part in obj_name.split("."):
obj = getattr(obj, part)
return obj
def get_obj_by_name(name: str) -> Any:
"""Finds the python object with the given name."""
module, obj_name = get_module_from_obj_name(name)
return get_obj_from_module(module, obj_name)
def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any:
"""Finds the python object with the given name and calls it as a function."""
assert func_name is not None
func_obj = get_obj_by_name(func_name)
assert callable(func_obj)
return func_obj(*args, **kwargs)
def get_module_dir_by_obj_name(obj_name: str) -> str:
"""Get the directory path of the module containing the given object name."""
module, _ = get_module_from_obj_name(obj_name)
return os.path.dirname(inspect.getfile(module))
def is_top_level_function(obj: Any) -> bool:
"""Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'."""
return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__
def get_top_level_function_name(obj: Any) -> str:
"""Return the fully-qualified name of a top-level function."""
assert is_top_level_function(obj)
return obj.__module__ + "." + obj.__name__
# File system helpers
# ------------------------------------------------------------------------------------------
def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]:
"""List all files recursively in a given directory while ignoring given file and directory names.
Returns list of tuples containing both absolute and relative paths."""
assert os.path.isdir(dir_path)
base_name = os.path.basename(os.path.normpath(dir_path))
if ignores is None:
ignores = []
result = []
for root, dirs, files in os.walk(dir_path, topdown=True):
for ignore_ in ignores:
dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)]
# dirs need to be edited in-place
for d in dirs_to_remove:
dirs.remove(d)
files = [f for f in files if not fnmatch.fnmatch(f, ignore_)]
absolute_paths = [os.path.join(root, f) for f in files]
relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths]
if add_base_to_relative:
relative_paths = [os.path.join(base_name, p) for p in relative_paths]
assert len(absolute_paths) == len(relative_paths)
result += zip(absolute_paths, relative_paths)
return result
def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None:
"""Takes in a list of tuples of (src, dst) paths and copies files.
Will create all necessary directories."""
for file in files:
target_dir_name = os.path.dirname(file[1])
# will create all intermediate-level directories
if not os.path.exists(target_dir_name):
os.makedirs(target_dir_name)
shutil.copyfile(file[0], file[1])
# URL helpers
# ------------------------------------------------------------------------------------------
def is_url(obj: Any, allow_file_urls: bool = False) -> bool:
"""Determine whether the given object is a valid URL string."""
if not isinstance(obj, str) or not "://" in obj:
return False
if allow_file_urls and obj.startswith('file:///'):
return True
try:
res = requests.compat.urlparse(obj)
if not res.scheme or not res.netloc or not "." in res.netloc:
return False
res = requests.compat.urlparse(requests.compat.urljoin(obj, "/"))
if not res.scheme or not res.netloc or not "." in res.netloc:
return False
except:
return False
return True
def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True) -> Any:
"""Download the given URL and return a binary-mode file object to access the data."""
assert is_url(url, allow_file_urls=True)
assert num_attempts >= 1
# Handle file URLs.
if url.startswith('file:///'):
return open(url[len('file:///'):], "rb")
# Lookup from cache.
url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest()
if cache_dir is not None:
cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*"))
if len(cache_files) == 1:
return open(cache_files[0], "rb")
# Download.
url_name = None
url_data = None
with requests.Session() as session:
if verbose:
print("Downloading %s ..." % url, end="", flush=True)
for attempts_left in reversed(range(num_attempts)):
try:
with session.get(url) as res:
res.raise_for_status()
if len(res.content) == 0:
raise IOError("No data received")
if len(res.content) < 8192:
content_str = res.content.decode("utf-8")
if "download_warning" in res.headers.get("Set-Cookie", ""):
links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link]
if len(links) == 1:
url = requests.compat.urljoin(url, links[0])
raise IOError("Google Drive virus checker nag")
if "Google Drive - Quota exceeded" in content_str:
raise IOError("Google Drive download quota exceeded -- please try again later")
match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", ""))
url_name = match[1] if match else url
url_data = res.content
if verbose:
print(" done")
break
except:
if not attempts_left:
if verbose:
print(" failed")
raise
if verbose:
print(".", end="", flush=True)
# Save to cache.
if cache_dir is not None:
safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name)
cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name)
temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name)
os.makedirs(cache_dir, exist_ok=True)
with open(temp_file, "wb") as f:
f.write(url_data)
os.replace(temp_file, cache_file) # atomic
# Return data as file object.
return io.BytesIO(url_data)
================================================
FILE: docs/license.html
================================================
Nvidia Source Code License-NC
Nvidia Source Code License-NC
1. Definitions
“Licensor” means any person or entity that distributes its Work.
“Software” means the original work of authorship made available under
this License.
“Work” means the Software and any additions to or derivative works of
the Software that are made available under this License.
“Nvidia Processors” means any central processing unit (CPU), graphics
processing unit (GPU), field-programmable gate array (FPGA),
application-specific integrated circuit (ASIC) or any combination
thereof designed, made, sold, or provided by Nvidia or its affiliates.
The terms “reproduce,” “reproduction,” “derivative works,” and
“distribution” have the meaning as provided under U.S. copyright law;
provided, however, that for the purposes of this License, derivative
works shall not include works that remain separable from, or merely
link (or bind by name) to the interfaces of, the Work.
Works, including the Software, are “made available” under this License
by including in or with the Work either (a) a copyright notice
referencing the applicability of this License to the Work, or (b) a
copy of this License.
2. License Grants
2.1 Copyright Grant. Subject to the terms and conditions of this
License, each Licensor grants to you a perpetual, worldwide,
non-exclusive, royalty-free, copyright license to reproduce,
prepare derivative works of, publicly display, publicly perform,
sublicense and distribute its Work and any resulting derivative
works in any form.
3. Limitations
3.1 Redistribution. You may reproduce or distribute the Work only
if (a) you do so under this License, (b) you include a complete
copy of this License with your distribution, and (c) you retain
without modification any copyright, patent, trademark, or
attribution notices that are present in the Work.
3.2 Derivative Works. You may specify that additional or different
terms apply to the use, reproduction, and distribution of your
derivative works of the Work (“Your Terms”) only if (a) Your Terms
provide that the use limitation in Section 3.3 applies to your
derivative works, and (b) you identify the specific derivative
works that are subject to Your Terms. Notwithstanding Your Terms,
this License (including the redistribution requirements in Section
3.1) will continue to apply to the Work itself.
3.3 Use Limitation. The Work and any derivative works thereof only
may be used or intended for use non-commercially. The Work or
derivative works thereof may be used or intended for use by Nvidia
or its affiliates commercially or non-commercially. As used herein,
“non-commercially” means for research or evaluation purposes only.
3.4 Patent Claims. If you bring or threaten to bring a patent claim
against any Licensor (including any claim, cross-claim or
counterclaim in a lawsuit) to enforce any patents that you allege
are infringed by any Work, then your rights under this License from
such Licensor (including the grants in Sections 2.1 and 2.2) will
terminate immediately.
3.5 Trademarks. This License does not grant any rights to use any
Licensor’s or its affiliates’ names, logos, or trademarks, except
as necessary to reproduce the notices described in this License.
3.6 Termination. If you violate any term of this License, then your
rights under this License (including the grants in Sections 2.1 and
2.2) will terminate immediately.
4. Disclaimer of Warranty.
THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
THIS LICENSE.
5. Limitation of Liability.
EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
THE POSSIBILITY OF SUCH DAMAGES.
================================================
FILE: docs/versions.html
================================================
StyleGAN versions
StyleGAN3 (2021)
StyleGAN2-ADA (2020)
StyleGAN2 (2019)
StyleGAN (2018)
Progressive GAN (2017)
================================================
FILE: metrics/__init__.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
# empty
================================================
FILE: metrics/frechet_inception_distance.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Frechet Inception Distance (FID)."""
import os
import numpy as np
import scipy
import tensorflow as tf
import dnnlib.tflib as tflib
from metrics import metric_base
from training import misc
#----------------------------------------------------------------------------
class FID(metric_base.MetricBase):
def __init__(self, num_images, minibatch_per_gpu, **kwargs):
super().__init__(**kwargs)
self.num_images = num_images
self.minibatch_per_gpu = minibatch_per_gpu
def _evaluate(self, Gs, Gs_kwargs, num_gpus):
minibatch_size = num_gpus * self.minibatch_per_gpu
inception = misc.load_pkl('https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/inception_v3_features.pkl')
activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32)
# Calculate statistics for reals.
cache_file = self._get_cache_file_for_reals(num_images=self.num_images)
os.makedirs(os.path.dirname(cache_file), exist_ok=True)
if os.path.isfile(cache_file):
mu_real, sigma_real = misc.load_pkl(cache_file)
else:
for idx, images in enumerate(self._iterate_reals(minibatch_size=minibatch_size)):
begin = idx * minibatch_size
end = min(begin + minibatch_size, self.num_images)
activations[begin:end] = inception.run(images[:end-begin], num_gpus=num_gpus, assume_frozen=True)
if end == self.num_images:
break
mu_real = np.mean(activations, axis=0)
sigma_real = np.cov(activations, rowvar=False)
misc.save_pkl((mu_real, sigma_real), cache_file)
# Construct TensorFlow graph.
result_expr = []
for gpu_idx in range(num_gpus):
with tf.device('/gpu:%d' % gpu_idx):
Gs_clone = Gs.clone()
inception_clone = inception.clone()
latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
labels = self._get_random_labels_tf(self.minibatch_per_gpu)
images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs)
images = tflib.convert_images_to_uint8(images)
result_expr.append(inception_clone.get_output_for(images))
# Calculate statistics for fakes.
for begin in range(0, self.num_images, minibatch_size):
self._report_progress(begin, self.num_images)
end = min(begin + minibatch_size, self.num_images)
activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin]
mu_fake = np.mean(activations, axis=0)
sigma_fake = np.cov(activations, rowvar=False)
# Calculate FID.
m = np.square(mu_fake - mu_real).sum()
s, _ = scipy.linalg.sqrtm(np.dot(sigma_fake, sigma_real), disp=False) # pylint: disable=no-member
dist = m + np.trace(sigma_fake + sigma_real - 2*s)
self._report_result(np.real(dist))
#----------------------------------------------------------------------------
================================================
FILE: metrics/inception_score.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Inception Score (IS)."""
import numpy as np
import tensorflow as tf
import dnnlib.tflib as tflib
from metrics import metric_base
from training import misc
#----------------------------------------------------------------------------
class IS(metric_base.MetricBase):
def __init__(self, num_images, num_splits, minibatch_per_gpu, **kwargs):
super().__init__(**kwargs)
self.num_images = num_images
self.num_splits = num_splits
self.minibatch_per_gpu = minibatch_per_gpu
def _evaluate(self, Gs, Gs_kwargs, num_gpus):
minibatch_size = num_gpus * self.minibatch_per_gpu
inception = misc.load_pkl('https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/inception_v3_softmax.pkl')
activations = np.empty([self.num_images, inception.output_shape[1]], dtype=np.float32)
# Construct TensorFlow graph.
result_expr = []
for gpu_idx in range(num_gpus):
with tf.device('/gpu:%d' % gpu_idx):
Gs_clone = Gs.clone()
inception_clone = inception.clone()
latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
labels = self._get_random_labels_tf(self.minibatch_per_gpu)
images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs)
images = tflib.convert_images_to_uint8(images)
result_expr.append(inception_clone.get_output_for(images))
# Calculate activations for fakes.
for begin in range(0, self.num_images, minibatch_size):
self._report_progress(begin, self.num_images)
end = min(begin + minibatch_size, self.num_images)
activations[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin]
# Calculate IS.
scores = []
for i in range(self.num_splits):
part = activations[i * self.num_images // self.num_splits : (i + 1) * self.num_images // self.num_splits]
kl = part * (np.log(part) - np.log(np.expand_dims(np.mean(part, 0), 0)))
kl = np.mean(np.sum(kl, 1))
scores.append(np.exp(kl))
self._report_result(np.mean(scores), suffix='_mean')
self._report_result(np.std(scores), suffix='_std')
#----------------------------------------------------------------------------
================================================
FILE: metrics/linear_separability.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Linear Separability (LS)."""
from collections import defaultdict
import numpy as np
import sklearn.svm
import tensorflow as tf
import dnnlib.tflib as tflib
from metrics import metric_base
from training import misc
#----------------------------------------------------------------------------
classifier_urls = [
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-00-male.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-01-smiling.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-02-attractive.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-03-wavy-hair.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-04-young.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-05-5-o-clock-shadow.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-06-arched-eyebrows.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-07-bags-under-eyes.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-08-bald.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-09-bangs.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-10-big-lips.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-11-big-nose.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-12-black-hair.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-13-blond-hair.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-14-blurry.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-15-brown-hair.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-16-bushy-eyebrows.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-17-chubby.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-18-double-chin.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-19-eyeglasses.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-20-goatee.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-21-gray-hair.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-22-heavy-makeup.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-23-high-cheekbones.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-24-mouth-slightly-open.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-25-mustache.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-26-narrow-eyes.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-27-no-beard.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-28-oval-face.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-29-pale-skin.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-30-pointy-nose.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-31-receding-hairline.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-32-rosy-cheeks.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-33-sideburns.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-34-straight-hair.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-35-wearing-earrings.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-36-wearing-hat.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-37-wearing-lipstick.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-38-wearing-necklace.pkl',
'https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/celebahq-classifier-39-wearing-necktie.pkl',
]
#----------------------------------------------------------------------------
def prob_normalize(p):
p = np.asarray(p).astype(np.float32)
assert len(p.shape) == 2
return p / np.sum(p)
def mutual_information(p):
p = prob_normalize(p)
px = np.sum(p, axis=1)
py = np.sum(p, axis=0)
result = 0.0
for x in range(p.shape[0]):
p_x = px[x]
for y in range(p.shape[1]):
p_xy = p[x][y]
p_y = py[y]
if p_xy > 0.0:
result += p_xy * np.log2(p_xy / (p_x * p_y)) # get bits as output
return result
def entropy(p):
p = prob_normalize(p)
result = 0.0
for x in range(p.shape[0]):
for y in range(p.shape[1]):
p_xy = p[x][y]
if p_xy > 0.0:
result -= p_xy * np.log2(p_xy)
return result
def conditional_entropy(p):
# H(Y|X) where X corresponds to axis 0, Y to axis 1
# i.e., How many bits of additional information are needed to where we are on axis 1 if we know where we are on axis 0?
p = prob_normalize(p)
y = np.sum(p, axis=0, keepdims=True) # marginalize to calculate H(Y)
return max(0.0, entropy(y) - mutual_information(p)) # can slip just below 0 due to FP inaccuracies, clean those up.
#----------------------------------------------------------------------------
class LS(metric_base.MetricBase):
def __init__(self, num_samples, num_keep, attrib_indices, minibatch_per_gpu, **kwargs):
assert num_keep <= num_samples
super().__init__(**kwargs)
self.num_samples = num_samples
self.num_keep = num_keep
self.attrib_indices = attrib_indices
self.minibatch_per_gpu = minibatch_per_gpu
def _evaluate(self, Gs, Gs_kwargs, num_gpus):
minibatch_size = num_gpus * self.minibatch_per_gpu
# Construct TensorFlow graph for each GPU.
result_expr = []
for gpu_idx in range(num_gpus):
with tf.device('/gpu:%d' % gpu_idx):
Gs_clone = Gs.clone()
# Generate images.
latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
labels = self._get_random_labels_tf(self.minibatch_per_gpu)
dlatents = Gs_clone.components.mapping.get_output_for(latents, labels, **Gs_kwargs)
images = Gs_clone.get_output_for(latents, None, **Gs_kwargs)
# Downsample to 256x256. The attribute classifiers were built for 256x256.
if images.shape[2] > 256:
factor = images.shape[2] // 256
images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor])
images = tf.reduce_mean(images, axis=[3, 5])
# Run classifier for each attribute.
result_dict = dict(latents=latents, dlatents=dlatents[:,-1])
for attrib_idx in self.attrib_indices:
classifier = misc.load_pkl(classifier_urls[attrib_idx])
logits = classifier.get_output_for(images, None)
predictions = tf.nn.softmax(tf.concat([logits, -logits], axis=1))
result_dict[attrib_idx] = predictions
result_expr.append(result_dict)
# Sampling loop.
results = []
for begin in range(0, self.num_samples, minibatch_size):
self._report_progress(begin, self.num_samples)
results += tflib.run(result_expr)
results = {key: np.concatenate([value[key] for value in results], axis=0) for key in results[0].keys()}
# Calculate conditional entropy for each attribute.
conditional_entropies = defaultdict(list)
for attrib_idx in self.attrib_indices:
# Prune the least confident samples.
pruned_indices = list(range(self.num_samples))
pruned_indices = sorted(pruned_indices, key=lambda i: -np.max(results[attrib_idx][i]))
pruned_indices = pruned_indices[:self.num_keep]
# Fit SVM to the remaining samples.
svm_targets = np.argmax(results[attrib_idx][pruned_indices], axis=1)
for space in ['latents', 'dlatents']:
svm_inputs = results[space][pruned_indices]
try:
svm = sklearn.svm.LinearSVC()
svm.fit(svm_inputs, svm_targets)
svm.score(svm_inputs, svm_targets)
svm_outputs = svm.predict(svm_inputs)
except:
svm_outputs = svm_targets # assume perfect prediction
# Calculate conditional entropy.
p = [[np.mean([case == (row, col) for case in zip(svm_outputs, svm_targets)]) for col in (0, 1)] for row in (0, 1)]
conditional_entropies[space].append(conditional_entropy(p))
# Calculate separability scores.
scores = {key: 2**np.sum(values) for key, values in conditional_entropies.items()}
self._report_result(scores['latents'], suffix='_z')
self._report_result(scores['dlatents'], suffix='_w')
#----------------------------------------------------------------------------
================================================
FILE: metrics/metric_base.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Common definitions for GAN metrics."""
import os
import time
import hashlib
import numpy as np
import tensorflow as tf
import dnnlib
import dnnlib.tflib as tflib
from training import misc
from training import dataset
#----------------------------------------------------------------------------
# Base class for metrics.
class MetricBase:
def __init__(self, name):
self.name = name
self._dataset_obj = None
self._progress_lo = None
self._progress_hi = None
self._progress_max = None
self._progress_sec = None
self._progress_time = None
self._reset()
def close(self):
self._reset()
def _reset(self, network_pkl=None, run_dir=None, data_dir=None, dataset_args=None, mirror_augment=None):
if self._dataset_obj is not None:
self._dataset_obj.close()
self._network_pkl = network_pkl
self._data_dir = data_dir
self._dataset_args = dataset_args
self._dataset_obj = None
self._mirror_augment = mirror_augment
self._eval_time = 0
self._results = []
if (dataset_args is None or mirror_augment is None) and run_dir is not None:
run_config = misc.parse_config_for_previous_run(run_dir)
self._dataset_args = dict(run_config['dataset'])
self._dataset_args['shuffle_mb'] = 0
self._mirror_augment = run_config['train'].get('mirror_augment', False)
def configure_progress_reports(self, plo, phi, pmax, psec=15):
self._progress_lo = plo
self._progress_hi = phi
self._progress_max = pmax
self._progress_sec = psec
def run(self, network_pkl, run_dir=None, data_dir=None, dataset_args=None, mirror_augment=None, num_gpus=1, tf_config=None, log_results=True, Gs_kwargs=dict(is_validation=True)):
self._reset(network_pkl=network_pkl, run_dir=run_dir, data_dir=data_dir, dataset_args=dataset_args, mirror_augment=mirror_augment)
time_begin = time.time()
with tf.Graph().as_default(), tflib.create_session(tf_config).as_default(): # pylint: disable=not-context-manager
self._report_progress(0, 1)
_G, _D, Gs = misc.load_pkl(self._network_pkl)
self._evaluate(Gs, Gs_kwargs=Gs_kwargs, num_gpus=num_gpus)
self._report_progress(1, 1)
self._eval_time = time.time() - time_begin # pylint: disable=attribute-defined-outside-init
if log_results:
if run_dir is not None:
log_file = os.path.join(run_dir, 'metric-%s.txt' % self.name)
with dnnlib.util.Logger(log_file, 'a'):
print(self.get_result_str().strip())
else:
print(self.get_result_str().strip())
def get_result_str(self):
network_name = os.path.splitext(os.path.basename(self._network_pkl))[0]
if len(network_name) > 29:
network_name = '...' + network_name[-26:]
result_str = '%-30s' % network_name
result_str += ' time %-12s' % dnnlib.util.format_time(self._eval_time)
for res in self._results:
result_str += ' ' + self.name + res.suffix + ' '
result_str += res.fmt % res.value
return result_str
def update_autosummaries(self):
for res in self._results:
tflib.autosummary.autosummary('Metrics/' + self.name + res.suffix, res.value)
def _evaluate(self, Gs, Gs_kwargs, num_gpus):
raise NotImplementedError # to be overridden by subclasses
def _report_result(self, value, suffix='', fmt='%-10.4f'):
self._results += [dnnlib.EasyDict(value=value, suffix=suffix, fmt=fmt)]
def _report_progress(self, pcur, pmax, status_str=''):
if self._progress_lo is None or self._progress_hi is None or self._progress_max is None:
return
t = time.time()
if self._progress_sec is not None and self._progress_time is not None and t < self._progress_time + self._progress_sec:
return
self._progress_time = t
val = self._progress_lo + (pcur / pmax) * (self._progress_hi - self._progress_lo)
dnnlib.RunContext.get().update(status_str, int(val), self._progress_max)
def _get_cache_file_for_reals(self, extension='pkl', **kwargs):
all_args = dnnlib.EasyDict(metric_name=self.name, mirror_augment=self._mirror_augment)
all_args.update(self._dataset_args)
all_args.update(kwargs)
md5 = hashlib.md5(repr(sorted(all_args.items())).encode('utf-8'))
dataset_name = self._dataset_args.get('tfrecord_dir', None) or self._dataset_args.get('h5_file', None)
dataset_name = os.path.splitext(os.path.basename(dataset_name))[0]
return os.path.join('.stylegan2-cache', '%s-%s-%s.%s' % (md5.hexdigest(), self.name, dataset_name, extension))
def _get_dataset_obj(self):
if self._dataset_obj is None:
self._dataset_obj = dataset.load_dataset(data_dir=self._data_dir, **self._dataset_args)
return self._dataset_obj
def _iterate_reals(self, minibatch_size):
dataset_obj = self._get_dataset_obj()
while True:
images, _labels = dataset_obj.get_minibatch_np(minibatch_size)
if self._mirror_augment:
images = misc.apply_mirror_augment(images)
yield images
def _iterate_fakes(self, Gs, minibatch_size, num_gpus):
while True:
latents = np.random.randn(minibatch_size, *Gs.input_shape[1:])
fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
images = Gs.run(latents, None, output_transform=fmt, is_validation=True, num_gpus=num_gpus, assume_frozen=True)
yield images
def _get_random_labels_tf(self, minibatch_size):
return self._get_dataset_obj().get_random_labels_tf(minibatch_size)
#----------------------------------------------------------------------------
# Group of multiple metrics.
class MetricGroup:
def __init__(self, metric_kwarg_list):
self.metrics = [dnnlib.util.call_func_by_name(**kwargs) for kwargs in metric_kwarg_list]
def run(self, *args, **kwargs):
for metric in self.metrics:
metric.run(*args, **kwargs)
def get_result_str(self):
return ' '.join(metric.get_result_str() for metric in self.metrics)
def update_autosummaries(self):
for metric in self.metrics:
metric.update_autosummaries()
#----------------------------------------------------------------------------
# Dummy metric for debugging purposes.
class DummyMetric(MetricBase):
def _evaluate(self, Gs, Gs_kwargs, num_gpus):
_ = Gs, Gs_kwargs, num_gpus
self._report_result(0.0)
#----------------------------------------------------------------------------
================================================
FILE: metrics/metric_defaults.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Default metric definitions."""
from dnnlib import EasyDict
#----------------------------------------------------------------------------
metric_defaults = EasyDict([(args.name, args) for args in [
EasyDict(name='fid50k', func_name='metrics.frechet_inception_distance.FID', num_images=50000, minibatch_per_gpu=8),
EasyDict(name='is50k', func_name='metrics.inception_score.IS', num_images=50000, num_splits=10, minibatch_per_gpu=8),
EasyDict(name='ppl_zfull', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='full', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')),
EasyDict(name='ppl_wfull', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='full', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')),
EasyDict(name='ppl_zend', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='z', sampling='end', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')),
EasyDict(name='ppl_wend', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=True, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')),
EasyDict(name='ppl2_wend', func_name='metrics.perceptual_path_length.PPL', num_samples=50000, epsilon=1e-4, space='w', sampling='end', crop=False, minibatch_per_gpu=4, Gs_overrides=dict(dtype='float32', mapping_dtype='float32')),
EasyDict(name='ls', func_name='metrics.linear_separability.LS', num_samples=200000, num_keep=100000, attrib_indices=range(40), minibatch_per_gpu=4),
EasyDict(name='pr50k3', func_name='metrics.precision_recall.PR', num_images=50000, nhood_size=3, minibatch_per_gpu=8, row_batch_size=10000, col_batch_size=10000),
]])
#----------------------------------------------------------------------------
================================================
FILE: metrics/perceptual_path_length.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Perceptual Path Length (PPL)."""
import numpy as np
import tensorflow as tf
import dnnlib.tflib as tflib
from metrics import metric_base
from training import misc
#----------------------------------------------------------------------------
# Normalize batch of vectors.
def normalize(v):
return v / tf.sqrt(tf.reduce_sum(tf.square(v), axis=-1, keepdims=True))
# Spherical interpolation of a batch of vectors.
def slerp(a, b, t):
a = normalize(a)
b = normalize(b)
d = tf.reduce_sum(a * b, axis=-1, keepdims=True)
p = t * tf.math.acos(d)
c = normalize(b - d * a)
d = a * tf.math.cos(p) + c * tf.math.sin(p)
return normalize(d)
#----------------------------------------------------------------------------
class PPL(metric_base.MetricBase):
def __init__(self, num_samples, epsilon, space, sampling, crop, minibatch_per_gpu, Gs_overrides, **kwargs):
assert space in ['z', 'w']
assert sampling in ['full', 'end']
super().__init__(**kwargs)
self.num_samples = num_samples
self.epsilon = epsilon
self.space = space
self.sampling = sampling
self.crop = crop
self.minibatch_per_gpu = minibatch_per_gpu
self.Gs_overrides = Gs_overrides
def _evaluate(self, Gs, Gs_kwargs, num_gpus):
Gs_kwargs = dict(Gs_kwargs)
Gs_kwargs.update(self.Gs_overrides)
minibatch_size = num_gpus * self.minibatch_per_gpu
# Construct TensorFlow graph.
distance_expr = []
for gpu_idx in range(num_gpus):
with tf.device('/gpu:%d' % gpu_idx):
Gs_clone = Gs.clone()
noise_vars = [var for name, var in Gs_clone.components.synthesis.vars.items() if name.startswith('noise')]
# Generate random latents and interpolation t-values.
lat_t01 = tf.random_normal([self.minibatch_per_gpu * 2] + Gs_clone.input_shape[1:])
lerp_t = tf.random_uniform([self.minibatch_per_gpu], 0.0, 1.0 if self.sampling == 'full' else 0.0)
labels = tf.reshape(tf.tile(self._get_random_labels_tf(self.minibatch_per_gpu), [1, 2]), [self.minibatch_per_gpu * 2, -1])
# Interpolate in W or Z.
if self.space == 'w':
dlat_t01 = Gs_clone.components.mapping.get_output_for(lat_t01, labels, **Gs_kwargs)
dlat_t01 = tf.cast(dlat_t01, tf.float32)
dlat_t0, dlat_t1 = dlat_t01[0::2], dlat_t01[1::2]
dlat_e0 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis])
dlat_e1 = tflib.lerp(dlat_t0, dlat_t1, lerp_t[:, np.newaxis, np.newaxis] + self.epsilon)
dlat_e01 = tf.reshape(tf.stack([dlat_e0, dlat_e1], axis=1), dlat_t01.shape)
else: # space == 'z'
lat_t0, lat_t1 = lat_t01[0::2], lat_t01[1::2]
lat_e0 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis])
lat_e1 = slerp(lat_t0, lat_t1, lerp_t[:, np.newaxis] + self.epsilon)
lat_e01 = tf.reshape(tf.stack([lat_e0, lat_e1], axis=1), lat_t01.shape)
dlat_e01 = Gs_clone.components.mapping.get_output_for(lat_e01, labels, **Gs_kwargs)
# Synthesize images.
with tf.control_dependencies([var.initializer for var in noise_vars]): # use same noise inputs for the entire minibatch
images = Gs_clone.components.synthesis.get_output_for(dlat_e01, randomize_noise=False, **Gs_kwargs)
images = tf.cast(images, tf.float32)
# Crop only the face region.
if self.crop:
c = int(images.shape[2] // 8)
images = images[:, :, c*3 : c*7, c*2 : c*6]
# Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
factor = images.shape[2] // 256
if factor > 1:
images = tf.reshape(images, [-1, images.shape[1], images.shape[2] // factor, factor, images.shape[3] // factor, factor])
images = tf.reduce_mean(images, axis=[3,5])
# Scale dynamic range from [-1,1] to [0,255] for VGG.
images = (images + 1) * (255 / 2)
# Evaluate perceptual distance.
img_e0, img_e1 = images[0::2], images[1::2]
distance_measure = misc.load_pkl('https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/vgg16_zhang_perceptual.pkl')
distance_expr.append(distance_measure.get_output_for(img_e0, img_e1) * (1 / self.epsilon**2))
# Sampling loop.
all_distances = []
for begin in range(0, self.num_samples, minibatch_size):
self._report_progress(begin, self.num_samples)
all_distances += tflib.run(distance_expr)
all_distances = np.concatenate(all_distances, axis=0)
# Reject outliers.
lo = np.percentile(all_distances, 1, interpolation='lower')
hi = np.percentile(all_distances, 99, interpolation='higher')
filtered_distances = np.extract(np.logical_and(lo <= all_distances, all_distances <= hi), all_distances)
self._report_result(np.mean(filtered_distances))
#----------------------------------------------------------------------------
================================================
FILE: metrics/precision_recall.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Precision/Recall (PR)."""
import os
import numpy as np
import tensorflow as tf
import dnnlib
import dnnlib.tflib as tflib
from metrics import metric_base
from training import misc
#----------------------------------------------------------------------------
def batch_pairwise_distances(U, V):
""" Compute pairwise distances between two batches of feature vectors."""
with tf.variable_scope('pairwise_dist_block'):
# Squared norms of each row in U and V.
norm_u = tf.reduce_sum(tf.square(U), 1)
norm_v = tf.reduce_sum(tf.square(V), 1)
# norm_u as a row and norm_v as a column vectors.
norm_u = tf.reshape(norm_u, [-1, 1])
norm_v = tf.reshape(norm_v, [1, -1])
# Pairwise squared Euclidean distances.
D = tf.maximum(norm_u - 2*tf.matmul(U, V, False, True) + norm_v, 0.0)
return D
#----------------------------------------------------------------------------
class DistanceBlock():
"""Distance block."""
def __init__(self, num_features, num_gpus):
self.num_features = num_features
self.num_gpus = num_gpus
# Initialize TF graph to calculate pairwise distances.
with tf.device('/cpu:0'):
self._features_batch1 = tf.placeholder(tf.float16, shape=[None, self.num_features])
self._features_batch2 = tf.placeholder(tf.float16, shape=[None, self.num_features])
features_split2 = tf.split(self._features_batch2, self.num_gpus, axis=0)
distances_split = []
for gpu_idx in range(self.num_gpus):
with tf.device('/gpu:%d' % gpu_idx):
distances_split.append(batch_pairwise_distances(self._features_batch1, features_split2[gpu_idx]))
self._distance_block = tf.concat(distances_split, axis=1)
def pairwise_distances(self, U, V):
"""Evaluate pairwise distances between two batches of feature vectors."""
return self._distance_block.eval(feed_dict={self._features_batch1: U, self._features_batch2: V})
#----------------------------------------------------------------------------
class ManifoldEstimator():
"""Finds an estimate for the manifold of given feature vectors."""
def __init__(self, distance_block, features, row_batch_size, col_batch_size, nhood_sizes, clamp_to_percentile=None):
"""Find an estimate of the manifold of given feature vectors."""
num_images = features.shape[0]
self.nhood_sizes = nhood_sizes
self.num_nhoods = len(nhood_sizes)
self.row_batch_size = row_batch_size
self.col_batch_size = col_batch_size
self._ref_features = features
self._distance_block = distance_block
# Estimate manifold of features by calculating distances to kth nearest neighbor of each sample.
self.D = np.zeros([num_images, self.num_nhoods], dtype=np.float16)
distance_batch = np.zeros([row_batch_size, num_images], dtype=np.float16)
seq = np.arange(max(self.nhood_sizes) + 1, dtype=np.int32)
for begin1 in range(0, num_images, row_batch_size):
end1 = min(begin1 + row_batch_size, num_images)
row_batch = features[begin1:end1]
for begin2 in range(0, num_images, col_batch_size):
end2 = min(begin2 + col_batch_size, num_images)
col_batch = features[begin2:end2]
# Compute distances between batches.
distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(row_batch, col_batch)
# Find the kth nearest neighbor from the current batch.
self.D[begin1:end1, :] = np.partition(distance_batch[0:end1-begin1, :], seq, axis=1)[:, self.nhood_sizes]
if clamp_to_percentile is not None:
max_distances = np.percentile(self.D, clamp_to_percentile, axis=0)
self.D[self.D > max_distances] = 0 #max_distances # 0
def evaluate(self, eval_features, return_realism=False, return_neighbors=False):
"""Evaluate if new feature vectors are in the estimated manifold."""
num_eval_images = eval_features.shape[0]
num_ref_images = self.D.shape[0]
distance_batch = np.zeros([self.row_batch_size, num_ref_images], dtype=np.float16)
batch_predictions = np.zeros([num_eval_images, self.num_nhoods], dtype=np.int32)
#max_realism_score = np.zeros([num_eval_images,], dtype=np.float32)
realism_score = np.zeros([num_eval_images,], dtype=np.float32)
nearest_indices = np.zeros([num_eval_images,], dtype=np.int32)
for begin1 in range(0, num_eval_images, self.row_batch_size):
end1 = min(begin1 + self.row_batch_size, num_eval_images)
feature_batch = eval_features[begin1:end1]
for begin2 in range(0, num_ref_images, self.col_batch_size):
end2 = min(begin2 + self.col_batch_size, num_ref_images)
ref_batch = self._ref_features[begin2:end2]
distance_batch[0:end1-begin1, begin2:end2] = self._distance_block.pairwise_distances(feature_batch, ref_batch)
# From the minibatch of new feature vectors, determine if they are in the estimated manifold.
# If a feature vector is inside a hypersphere of some reference sample, then the new sample lies on the estimated manifold.
# The radii of the hyperspheres are determined from distances of neighborhood size k.
samples_in_manifold = distance_batch[0:end1-begin1, :, None] <= self.D
batch_predictions[begin1:end1] = np.any(samples_in_manifold, axis=1).astype(np.int32)
#max_realism_score[begin1:end1] = np.max(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1)
#nearest_indices[begin1:end1] = np.argmax(self.D[:, 0] / (distance_batch[0:end1-begin1, :] + 1e-18), axis=1)
nearest_indices[begin1:end1] = np.argmin(distance_batch[0:end1-begin1, :], axis=1)
realism_score[begin1:end1] = self.D[nearest_indices[begin1:end1], 0] / np.min(distance_batch[0:end1-begin1, :], axis=1)
if return_realism and return_neighbors:
return batch_predictions, realism_score, nearest_indices
elif return_realism:
return batch_predictions, realism_score
elif return_neighbors:
return batch_predictions, nearest_indices
return batch_predictions
#----------------------------------------------------------------------------
def knn_precision_recall_features(ref_features, eval_features, feature_net, nhood_sizes,
row_batch_size, col_batch_size, num_gpus):
"""Calculates k-NN precision and recall for two sets of feature vectors."""
state = dnnlib.EasyDict()
#num_images = ref_features.shape[0]
num_features = feature_net.output_shape[1]
state.ref_features = ref_features
state.eval_features = eval_features
# Initialize DistanceBlock and ManifoldEstimators.
distance_block = DistanceBlock(num_features, num_gpus)
state.ref_manifold = ManifoldEstimator(distance_block, state.ref_features, row_batch_size, col_batch_size, nhood_sizes)
state.eval_manifold = ManifoldEstimator(distance_block, state.eval_features, row_batch_size, col_batch_size, nhood_sizes)
# Evaluate precision and recall using k-nearest neighbors.
#print('Evaluating k-NN precision and recall with %i samples...' % num_images)
#start = time.time()
# Precision: How many points from eval_features are in ref_features manifold.
state.precision, state.realism_scores, state.nearest_neighbors = state.ref_manifold.evaluate(state.eval_features, return_realism=True, return_neighbors=True)
state.knn_precision = state.precision.mean(axis=0)
# Recall: How many points from ref_features are in eval_features manifold.
state.recall = state.eval_manifold.evaluate(state.ref_features)
state.knn_recall = state.recall.mean(axis=0)
#elapsed_time = time.time() - start
#print('Done evaluation in: %gs' % elapsed_time)
return state
#----------------------------------------------------------------------------
class PR(metric_base.MetricBase):
def __init__(self, num_images, nhood_size, minibatch_per_gpu, row_batch_size, col_batch_size, **kwargs):
super().__init__(**kwargs)
self.num_images = num_images
self.nhood_size = nhood_size
self.minibatch_per_gpu = minibatch_per_gpu
self.row_batch_size = row_batch_size
self.col_batch_size = col_batch_size
def _evaluate(self, Gs, Gs_kwargs, num_gpus):
minibatch_size = num_gpus * self.minibatch_per_gpu
feature_net = misc.load_pkl('https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/vgg16.pkl')
# Calculate features for reals.
cache_file = self._get_cache_file_for_reals(num_images=self.num_images)
os.makedirs(os.path.dirname(cache_file), exist_ok=True)
if os.path.isfile(cache_file):
ref_features = misc.load_pkl(cache_file)
else:
ref_features = np.empty([self.num_images, feature_net.output_shape[1]], dtype=np.float32)
for idx, images in enumerate(self._iterate_reals(minibatch_size=minibatch_size)):
begin = idx * minibatch_size
end = min(begin + minibatch_size, self.num_images)
ref_features[begin:end] = feature_net.run(images[:end-begin], num_gpus=num_gpus, assume_frozen=True)
if end == self.num_images:
break
misc.save_pkl(ref_features, cache_file)
# Construct TensorFlow graph.
result_expr = []
for gpu_idx in range(num_gpus):
with tf.device('/gpu:%d' % gpu_idx):
Gs_clone = Gs.clone()
feature_net_clone = feature_net.clone()
latents = tf.random_normal([self.minibatch_per_gpu] + Gs_clone.input_shape[1:])
labels = self._get_random_labels_tf(self.minibatch_per_gpu)
images = Gs_clone.get_output_for(latents, labels, **Gs_kwargs)
images = tflib.convert_images_to_uint8(images)
result_expr.append(feature_net_clone.get_output_for(images))
# Calculate features for fakes.
eval_features = np.empty([self.num_images, feature_net.output_shape[1]], dtype=np.float32)
for begin in range(0, self.num_images, minibatch_size):
self._report_progress(begin, self.num_images)
end = min(begin + minibatch_size, self.num_images)
eval_features[begin:end] = np.concatenate(tflib.run(result_expr), axis=0)[:end-begin]
# Calculate precision and recall.
state = knn_precision_recall_features(ref_features=ref_features, eval_features=eval_features, feature_net=feature_net,
nhood_sizes=[self.nhood_size], row_batch_size=self.row_batch_size, col_batch_size=self.row_batch_size, num_gpus=num_gpus)
self._report_result(state.knn_precision[0], suffix='_precision')
self._report_result(state.knn_recall[0], suffix='_recall')
#----------------------------------------------------------------------------
================================================
FILE: pretrained_networks.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""List of pre-trained StyleGAN2 networks located on Google Drive."""
import pickle
import dnnlib
import dnnlib.tflib as tflib
#----------------------------------------------------------------------------
# StyleGAN2 Google Drive root: https://drive.google.com/open?id=1QHc-yF5C3DChRwSdZKcx1w6K8JvSxQi7
gdrive_urls = {
'gdrive:networks/stylegan2-car-config-a.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-car-config-a.pkl',
'gdrive:networks/stylegan2-car-config-b.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-car-config-b.pkl',
'gdrive:networks/stylegan2-car-config-c.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-car-config-c.pkl',
'gdrive:networks/stylegan2-car-config-d.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-car-config-d.pkl',
'gdrive:networks/stylegan2-car-config-e.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-car-config-e.pkl',
'gdrive:networks/stylegan2-car-config-f.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-car-config-f.pkl',
'gdrive:networks/stylegan2-cat-config-a.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-cat-config-a.pkl',
'gdrive:networks/stylegan2-cat-config-f.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-cat-config-f.pkl',
'gdrive:networks/stylegan2-church-config-a.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-church-config-a.pkl',
'gdrive:networks/stylegan2-church-config-f.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-church-config-f.pkl',
'gdrive:networks/stylegan2-ffhq-config-a.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-ffhq-config-a.pkl',
'gdrive:networks/stylegan2-ffhq-config-b.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-ffhq-config-b.pkl',
'gdrive:networks/stylegan2-ffhq-config-c.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-ffhq-config-c.pkl',
'gdrive:networks/stylegan2-ffhq-config-d.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-ffhq-config-d.pkl',
'gdrive:networks/stylegan2-ffhq-config-e.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-ffhq-config-e.pkl',
'gdrive:networks/stylegan2-ffhq-config-f.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-ffhq-config-f.pkl',
'gdrive:networks/stylegan2-horse-config-a.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-horse-config-a.pkl',
'gdrive:networks/stylegan2-horse-config-f.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/stylegan2-horse-config-f.pkl',
'gdrive:networks/table2/stylegan2-car-config-e-Gorig-Dorig.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-car-config-e-Gorig-Dorig.pkl',
'gdrive:networks/table2/stylegan2-car-config-e-Gorig-Dresnet.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-car-config-e-Gorig-Dresnet.pkl',
'gdrive:networks/table2/stylegan2-car-config-e-Gorig-Dskip.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-car-config-e-Gorig-Dskip.pkl',
'gdrive:networks/table2/stylegan2-car-config-e-Gresnet-Dorig.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-car-config-e-Gresnet-Dorig.pkl',
'gdrive:networks/table2/stylegan2-car-config-e-Gresnet-Dresnet.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-car-config-e-Gresnet-Dresnet.pkl',
'gdrive:networks/table2/stylegan2-car-config-e-Gresnet-Dskip.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-car-config-e-Gresnet-Dskip.pkl',
'gdrive:networks/table2/stylegan2-car-config-e-Gskip-Dorig.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-car-config-e-Gskip-Dorig.pkl',
'gdrive:networks/table2/stylegan2-car-config-e-Gskip-Dresnet.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-car-config-e-Gskip-Dresnet.pkl',
'gdrive:networks/table2/stylegan2-car-config-e-Gskip-Dskip.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-car-config-e-Gskip-Dskip.pkl',
'gdrive:networks/table2/stylegan2-ffhq-config-e-Gorig-Dorig.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gorig-Dorig.pkl',
'gdrive:networks/table2/stylegan2-ffhq-config-e-Gorig-Dresnet.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gorig-Dresnet.pkl',
'gdrive:networks/table2/stylegan2-ffhq-config-e-Gorig-Dskip.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gorig-Dskip.pkl',
'gdrive:networks/table2/stylegan2-ffhq-config-e-Gresnet-Dorig.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gresnet-Dorig.pkl',
'gdrive:networks/table2/stylegan2-ffhq-config-e-Gresnet-Dresnet.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gresnet-Dresnet.pkl',
'gdrive:networks/table2/stylegan2-ffhq-config-e-Gresnet-Dskip.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gresnet-Dskip.pkl',
'gdrive:networks/table2/stylegan2-ffhq-config-e-Gskip-Dorig.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gskip-Dorig.pkl',
'gdrive:networks/table2/stylegan2-ffhq-config-e-Gskip-Dresnet.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gskip-Dresnet.pkl',
'gdrive:networks/table2/stylegan2-ffhq-config-e-Gskip-Dskip.pkl': 'https://nvlabs-fi-cdn.nvidia.com/stylegan2/networks/table2/stylegan2-ffhq-config-e-Gskip-Dskip.pkl',
}
#----------------------------------------------------------------------------
def get_path_or_url(path_or_gdrive_path):
return gdrive_urls.get(path_or_gdrive_path, path_or_gdrive_path)
#----------------------------------------------------------------------------
_cached_networks = dict()
def load_networks(path_or_gdrive_path):
path_or_url = get_path_or_url(path_or_gdrive_path)
if path_or_url in _cached_networks:
return _cached_networks[path_or_url]
if dnnlib.util.is_url(path_or_url):
stream = dnnlib.util.open_url(path_or_url, cache_dir='.stylegan2-cache')
else:
stream = open(path_or_url, 'rb')
tflib.init_tf()
with stream:
G, D, Gs = pickle.load(stream, encoding='latin1')
_cached_networks[path_or_url] = G, D, Gs
return G, D, Gs
#----------------------------------------------------------------------------
================================================
FILE: projector.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
import numpy as np
import tensorflow as tf
import dnnlib
import dnnlib.tflib as tflib
from training import misc
#----------------------------------------------------------------------------
class Projector:
def __init__(self):
self.num_steps = 1000
self.dlatent_avg_samples = 10000
self.initial_learning_rate = 0.1
self.initial_noise_factor = 0.05
self.lr_rampdown_length = 0.25
self.lr_rampup_length = 0.05
self.noise_ramp_length = 0.75
self.regularize_noise_weight = 1e5
self.verbose = False
self.clone_net = True
self._Gs = None
self._minibatch_size = None
self._dlatent_avg = None
self._dlatent_std = None
self._noise_vars = None
self._noise_init_op = None
self._noise_normalize_op = None
self._dlatents_var = None
self._noise_in = None
self._dlatents_expr = None
self._images_expr = None
self._target_images_var = None
self._lpips = None
self._dist = None
self._loss = None
self._reg_sizes = None
self._lrate_in = None
self._opt = None
self._opt_step = None
self._cur_step = None
def _info(self, *args):
if self.verbose:
print('Projector:', *args)
def set_network(self, Gs, minibatch_size=1):
assert minibatch_size == 1
self._Gs = Gs
self._minibatch_size = minibatch_size
if self._Gs is None:
return
if self.clone_net:
self._Gs = self._Gs.clone()
# Find dlatent stats.
self._info('Finding W midpoint and stddev using %d samples...' % self.dlatent_avg_samples)
latent_samples = np.random.RandomState(123).randn(self.dlatent_avg_samples, *self._Gs.input_shapes[0][1:])
dlatent_samples = self._Gs.components.mapping.run(latent_samples, None)[:, :1, :] # [N, 1, 512]
self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 1, 512]
self._dlatent_std = (np.sum((dlatent_samples - self._dlatent_avg) ** 2) / self.dlatent_avg_samples) ** 0.5
self._info('std = %g' % self._dlatent_std)
# Find noise inputs.
self._info('Setting up noise inputs...')
self._noise_vars = []
noise_init_ops = []
noise_normalize_ops = []
while True:
n = 'G_synthesis/noise%d' % len(self._noise_vars)
if not n in self._Gs.vars:
break
v = self._Gs.vars[n]
self._noise_vars.append(v)
noise_init_ops.append(tf.assign(v, tf.random_normal(tf.shape(v), dtype=tf.float32)))
noise_mean = tf.reduce_mean(v)
noise_std = tf.reduce_mean((v - noise_mean)**2)**0.5
noise_normalize_ops.append(tf.assign(v, (v - noise_mean) / noise_std))
self._info(n, v)
self._noise_init_op = tf.group(*noise_init_ops)
self._noise_normalize_op = tf.group(*noise_normalize_ops)
# Image output graph.
self._info('Building image output graph...')
self._dlatents_var = tf.Variable(tf.zeros([self._minibatch_size] + list(self._dlatent_avg.shape[1:])), name='dlatents_var')
self._noise_in = tf.placeholder(tf.float32, [], name='noise_in')
dlatents_noise = tf.random.normal(shape=self._dlatents_var.shape) * self._noise_in
self._dlatents_expr = tf.tile(self._dlatents_var + dlatents_noise, [1, self._Gs.components.synthesis.input_shape[1], 1])
self._images_expr = self._Gs.components.synthesis.get_output_for(self._dlatents_expr, randomize_noise=False)
# Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images.
proc_images_expr = (self._images_expr + 1) * (255 / 2)
sh = proc_images_expr.shape.as_list()
if sh[2] > 256:
factor = sh[2] // 256
proc_images_expr = tf.reduce_mean(tf.reshape(proc_images_expr, [-1, sh[1], sh[2] // factor, factor, sh[2] // factor, factor]), axis=[3,5])
# Loss graph.
self._info('Building loss graph...')
self._target_images_var = tf.Variable(tf.zeros(proc_images_expr.shape), name='target_images_var')
if self._lpips is None:
self._lpips = misc.load_pkl('https://nvlabs-fi-cdn.nvidia.com/stylegan/networks/metrics/vgg16_zhang_perceptual.pkl')
self._dist = self._lpips.get_output_for(proc_images_expr, self._target_images_var)
self._loss = tf.reduce_sum(self._dist)
# Noise regularization graph.
self._info('Building noise regularization graph...')
reg_loss = 0.0
for v in self._noise_vars:
sz = v.shape[2]
while True:
reg_loss += tf.reduce_mean(v * tf.roll(v, shift=1, axis=3))**2 + tf.reduce_mean(v * tf.roll(v, shift=1, axis=2))**2
if sz <= 8:
break # Small enough already
v = tf.reshape(v, [1, 1, sz//2, 2, sz//2, 2]) # Downscale
v = tf.reduce_mean(v, axis=[3, 5])
sz = sz // 2
self._loss += reg_loss * self.regularize_noise_weight
# Optimizer.
self._info('Setting up optimizer...')
self._lrate_in = tf.placeholder(tf.float32, [], name='lrate_in')
self._opt = dnnlib.tflib.Optimizer(learning_rate=self._lrate_in)
self._opt.register_gradients(self._loss, [self._dlatents_var] + self._noise_vars)
self._opt_step = self._opt.apply_updates()
def run(self, target_images):
# Run to completion.
self.start(target_images)
while self._cur_step < self.num_steps:
self.step()
# Collect results.
pres = dnnlib.EasyDict()
pres.dlatents = self.get_dlatents()
pres.noises = self.get_noises()
pres.images = self.get_images()
return pres
def start(self, target_images):
assert self._Gs is not None
# Prepare target images.
self._info('Preparing target images...')
target_images = np.asarray(target_images, dtype='float32')
target_images = (target_images + 1) * (255 / 2)
sh = target_images.shape
assert sh[0] == self._minibatch_size
if sh[2] > self._target_images_var.shape[2]:
factor = sh[2] // self._target_images_var.shape[2]
target_images = np.reshape(target_images, [-1, sh[1], sh[2] // factor, factor, sh[3] // factor, factor]).mean((3, 5))
# Initialize optimization state.
self._info('Initializing optimization state...')
tflib.set_vars({self._target_images_var: target_images, self._dlatents_var: np.tile(self._dlatent_avg, [self._minibatch_size, 1, 1])})
tflib.run(self._noise_init_op)
self._opt.reset_optimizer_state()
self._cur_step = 0
def step(self):
assert self._cur_step is not None
if self._cur_step >= self.num_steps:
return
if self._cur_step == 0:
self._info('Running...')
# Hyperparameters.
t = self._cur_step / self.num_steps
noise_strength = self._dlatent_std * self.initial_noise_factor * max(0.0, 1.0 - t / self.noise_ramp_length) ** 2
lr_ramp = min(1.0, (1.0 - t) / self.lr_rampdown_length)
lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi)
lr_ramp = lr_ramp * min(1.0, t / self.lr_rampup_length)
learning_rate = self.initial_learning_rate * lr_ramp
# Train.
feed_dict = {self._noise_in: noise_strength, self._lrate_in: learning_rate}
_, dist_value, loss_value = tflib.run([self._opt_step, self._dist, self._loss], feed_dict)
tflib.run(self._noise_normalize_op)
# Print status.
self._cur_step += 1
if self._cur_step == self.num_steps or self._cur_step % 10 == 0:
self._info('%-8d%-12g%-12g' % (self._cur_step, dist_value, loss_value))
if self._cur_step == self.num_steps:
self._info('Done.')
def get_cur_step(self):
return self._cur_step
def get_dlatents(self):
return tflib.run(self._dlatents_expr, {self._noise_in: 0})
def get_noises(self):
return tflib.run(self._noise_vars)
def get_images(self):
return tflib.run(self._images_expr, {self._noise_in: 0})
#----------------------------------------------------------------------------
================================================
FILE: run_generator.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
import argparse
import numpy as np
import PIL.Image
import dnnlib
import dnnlib.tflib as tflib
import re
import sys
import pretrained_networks
#----------------------------------------------------------------------------
def generate_images(network_pkl, seeds, truncation_psi):
print('Loading networks from "%s"...' % network_pkl)
_G, _D, Gs = pretrained_networks.load_networks(network_pkl)
noise_vars = [var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')]
Gs_kwargs = dnnlib.EasyDict()
Gs_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
Gs_kwargs.randomize_noise = False
if truncation_psi is not None:
Gs_kwargs.truncation_psi = truncation_psi
for seed_idx, seed in enumerate(seeds):
print('Generating image for seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds)))
rnd = np.random.RandomState(seed)
z = rnd.randn(1, *Gs.input_shape[1:]) # [minibatch, component]
tflib.set_vars({var: rnd.randn(*var.shape.as_list()) for var in noise_vars}) # [height, width]
images = Gs.run(z, None, **Gs_kwargs) # [minibatch, height, width, channel]
PIL.Image.fromarray(images[0], 'RGB').save(dnnlib.make_run_dir_path('seed%04d.png' % seed))
#----------------------------------------------------------------------------
def style_mixing_example(network_pkl, row_seeds, col_seeds, truncation_psi, col_styles, minibatch_size=4):
print('Loading networks from "%s"...' % network_pkl)
_G, _D, Gs = pretrained_networks.load_networks(network_pkl)
w_avg = Gs.get_var('dlatent_avg') # [component]
Gs_syn_kwargs = dnnlib.EasyDict()
Gs_syn_kwargs.output_transform = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
Gs_syn_kwargs.randomize_noise = False
Gs_syn_kwargs.minibatch_size = minibatch_size
print('Generating W vectors...')
all_seeds = list(set(row_seeds + col_seeds))
all_z = np.stack([np.random.RandomState(seed).randn(*Gs.input_shape[1:]) for seed in all_seeds]) # [minibatch, component]
all_w = Gs.components.mapping.run(all_z, None) # [minibatch, layer, component]
all_w = w_avg + (all_w - w_avg) * truncation_psi # [minibatch, layer, component]
w_dict = {seed: w for seed, w in zip(all_seeds, list(all_w))} # [layer, component]
print('Generating images...')
all_images = Gs.components.synthesis.run(all_w, **Gs_syn_kwargs) # [minibatch, height, width, channel]
image_dict = {(seed, seed): image for seed, image in zip(all_seeds, list(all_images))}
print('Generating style-mixed images...')
for row_seed in row_seeds:
for col_seed in col_seeds:
w = w_dict[row_seed].copy()
w[col_styles] = w_dict[col_seed][col_styles]
image = Gs.components.synthesis.run(w[np.newaxis], **Gs_syn_kwargs)[0]
image_dict[(row_seed, col_seed)] = image
print('Saving images...')
for (row_seed, col_seed), image in image_dict.items():
PIL.Image.fromarray(image, 'RGB').save(dnnlib.make_run_dir_path('%d-%d.png' % (row_seed, col_seed)))
print('Saving image grid...')
_N, _C, H, W = Gs.output_shape
canvas = PIL.Image.new('RGB', (W * (len(col_seeds) + 1), H * (len(row_seeds) + 1)), 'black')
for row_idx, row_seed in enumerate([None] + row_seeds):
for col_idx, col_seed in enumerate([None] + col_seeds):
if row_seed is None and col_seed is None:
continue
key = (row_seed, col_seed)
if row_seed is None:
key = (col_seed, col_seed)
if col_seed is None:
key = (row_seed, row_seed)
canvas.paste(PIL.Image.fromarray(image_dict[key], 'RGB'), (W * col_idx, H * row_idx))
canvas.save(dnnlib.make_run_dir_path('grid.png'))
#----------------------------------------------------------------------------
def _parse_num_range(s):
'''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.'''
range_re = re.compile(r'^(\d+)-(\d+)$')
m = range_re.match(s)
if m:
return list(range(int(m.group(1)), int(m.group(2))+1))
vals = s.split(',')
return [int(x) for x in vals]
#----------------------------------------------------------------------------
_examples = '''examples:
# Generate ffhq uncurated images (matches paper Figure 12)
python %(prog)s generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --seeds=6600-6625 --truncation-psi=0.5
# Generate ffhq curated images (matches paper Figure 11)
python %(prog)s generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --seeds=66,230,389,1518 --truncation-psi=1.0
# Generate uncurated car images (matches paper Figure 12)
python %(prog)s generate-images --network=gdrive:networks/stylegan2-car-config-f.pkl --seeds=6000-6025 --truncation-psi=0.5
# Generate style mixing example (matches style mixing video clip)
python %(prog)s style-mixing-example --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --row-seeds=85,100,75,458,1500 --col-seeds=55,821,1789,293 --truncation-psi=1.0
'''
#----------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description='''StyleGAN2 generator.
Run 'python %(prog)s --help' for subcommand help.''',
epilog=_examples,
formatter_class=argparse.RawDescriptionHelpFormatter
)
subparsers = parser.add_subparsers(help='Sub-commands', dest='command')
parser_generate_images = subparsers.add_parser('generate-images', help='Generate images')
parser_generate_images.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True)
parser_generate_images.add_argument('--seeds', type=_parse_num_range, help='List of random seeds', required=True)
parser_generate_images.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5)
parser_generate_images.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR')
parser_style_mixing_example = subparsers.add_parser('style-mixing-example', help='Generate style mixing video')
parser_style_mixing_example.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True)
parser_style_mixing_example.add_argument('--row-seeds', type=_parse_num_range, help='Random seeds to use for image rows', required=True)
parser_style_mixing_example.add_argument('--col-seeds', type=_parse_num_range, help='Random seeds to use for image columns', required=True)
parser_style_mixing_example.add_argument('--col-styles', type=_parse_num_range, help='Style layer range (default: %(default)s)', default='0-6')
parser_style_mixing_example.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=0.5)
parser_style_mixing_example.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR')
args = parser.parse_args()
kwargs = vars(args)
subcmd = kwargs.pop('command')
if subcmd is None:
print ('Error: missing subcommand. Re-run with --help for usage.')
sys.exit(1)
sc = dnnlib.SubmitConfig()
sc.num_gpus = 1
sc.submit_target = dnnlib.SubmitTarget.LOCAL
sc.local.do_not_copy_source_files = True
sc.run_dir_root = kwargs.pop('result_dir')
sc.run_desc = subcmd
func_name_map = {
'generate-images': 'run_generator.generate_images',
'style-mixing-example': 'run_generator.style_mixing_example'
}
dnnlib.submit_run(sc, func_name_map[subcmd], **kwargs)
#----------------------------------------------------------------------------
if __name__ == "__main__":
main()
#----------------------------------------------------------------------------
================================================
FILE: run_metrics.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
import argparse
import os
import sys
import dnnlib
import dnnlib.tflib as tflib
import pretrained_networks
from metrics import metric_base
from metrics.metric_defaults import metric_defaults
#----------------------------------------------------------------------------
def run(network_pkl, metrics, dataset, data_dir, mirror_augment):
print('Evaluating metrics "%s" for "%s"...' % (','.join(metrics), network_pkl))
tflib.init_tf()
network_pkl = pretrained_networks.get_path_or_url(network_pkl)
dataset_args = dnnlib.EasyDict(tfrecord_dir=dataset, shuffle_mb=0)
num_gpus = dnnlib.submit_config.num_gpus
metric_group = metric_base.MetricGroup([metric_defaults[metric] for metric in metrics])
metric_group.run(network_pkl, data_dir=data_dir, dataset_args=dataset_args, mirror_augment=mirror_augment, num_gpus=num_gpus)
#----------------------------------------------------------------------------
def _str_to_bool(v):
if isinstance(v, bool):
return v
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
#----------------------------------------------------------------------------
_examples = '''examples:
python %(prog)s --data-dir=~/datasets --network=gdrive:networks/stylegan2-ffhq-config-f.pkl --metrics=fid50k,ppl_wend --dataset=ffhq --mirror-augment=true
valid metrics:
''' + ', '.join(sorted([x for x in metric_defaults.keys()])) + '''
'''
def main():
parser = argparse.ArgumentParser(
description='Run StyleGAN2 metrics.',
epilog=_examples,
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR')
parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True)
parser.add_argument('--metrics', help='Metrics to compute (default: %(default)s)', default='fid50k', type=lambda x: x.split(','))
parser.add_argument('--dataset', help='Training dataset', required=True)
parser.add_argument('--data-dir', help='Dataset root directory', required=True)
parser.add_argument('--mirror-augment', help='Mirror augment (default: %(default)s)', default=False, type=_str_to_bool, metavar='BOOL')
parser.add_argument('--num-gpus', help='Number of GPUs to use', type=int, default=1, metavar='N')
args = parser.parse_args()
if not os.path.exists(args.data_dir):
print ('Error: dataset root directory does not exist.')
sys.exit(1)
kwargs = vars(args)
sc = dnnlib.SubmitConfig()
sc.num_gpus = kwargs.pop('num_gpus')
sc.submit_target = dnnlib.SubmitTarget.LOCAL
sc.local.do_not_copy_source_files = True
sc.run_dir_root = kwargs.pop('result_dir')
sc.run_desc = 'run-metrics'
dnnlib.submit_run(sc, 'run_metrics.run', **kwargs)
#----------------------------------------------------------------------------
if __name__ == "__main__":
main()
#----------------------------------------------------------------------------
================================================
FILE: run_projector.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
import argparse
import numpy as np
import dnnlib
import dnnlib.tflib as tflib
import re
import sys
import projector
import pretrained_networks
from training import dataset
from training import misc
#----------------------------------------------------------------------------
def project_image(proj, targets, png_prefix, num_snapshots):
snapshot_steps = set(proj.num_steps - np.linspace(0, proj.num_steps, num_snapshots, endpoint=False, dtype=int))
misc.save_image_grid(targets, png_prefix + 'target.png', drange=[-1,1])
proj.start(targets)
while proj.get_cur_step() < proj.num_steps:
print('\r%d / %d ... ' % (proj.get_cur_step(), proj.num_steps), end='', flush=True)
proj.step()
if proj.get_cur_step() in snapshot_steps:
misc.save_image_grid(proj.get_images(), png_prefix + 'step%04d.png' % proj.get_cur_step(), drange=[-1,1])
print('\r%-30s\r' % '', end='', flush=True)
#----------------------------------------------------------------------------
def project_generated_images(network_pkl, seeds, num_snapshots, truncation_psi):
print('Loading networks from "%s"...' % network_pkl)
_G, _D, Gs = pretrained_networks.load_networks(network_pkl)
proj = projector.Projector()
proj.set_network(Gs)
noise_vars = [var for name, var in Gs.components.synthesis.vars.items() if name.startswith('noise')]
Gs_kwargs = dnnlib.EasyDict()
Gs_kwargs.randomize_noise = False
Gs_kwargs.truncation_psi = truncation_psi
for seed_idx, seed in enumerate(seeds):
print('Projecting seed %d (%d/%d) ...' % (seed, seed_idx, len(seeds)))
rnd = np.random.RandomState(seed)
z = rnd.randn(1, *Gs.input_shape[1:])
tflib.set_vars({var: rnd.randn(*var.shape.as_list()) for var in noise_vars})
images = Gs.run(z, None, **Gs_kwargs)
project_image(proj, targets=images, png_prefix=dnnlib.make_run_dir_path('seed%04d-' % seed), num_snapshots=num_snapshots)
#----------------------------------------------------------------------------
def project_real_images(network_pkl, dataset_name, data_dir, num_images, num_snapshots):
print('Loading networks from "%s"...' % network_pkl)
_G, _D, Gs = pretrained_networks.load_networks(network_pkl)
proj = projector.Projector()
proj.set_network(Gs)
print('Loading images from "%s"...' % dataset_name)
dataset_obj = dataset.load_dataset(data_dir=data_dir, tfrecord_dir=dataset_name, max_label_size=0, repeat=False, shuffle_mb=0)
assert dataset_obj.shape == Gs.output_shape[1:]
for image_idx in range(num_images):
print('Projecting image %d/%d ...' % (image_idx, num_images))
images, _labels = dataset_obj.get_minibatch_np(1)
images = misc.adjust_dynamic_range(images, [0, 255], [-1, 1])
project_image(proj, targets=images, png_prefix=dnnlib.make_run_dir_path('image%04d-' % image_idx), num_snapshots=num_snapshots)
#----------------------------------------------------------------------------
def _parse_num_range(s):
'''Accept either a comma separated list of numbers 'a,b,c' or a range 'a-c' and return as a list of ints.'''
range_re = re.compile(r'^(\d+)-(\d+)$')
m = range_re.match(s)
if m:
return list(range(int(m.group(1)), int(m.group(2))+1))
vals = s.split(',')
return [int(x) for x in vals]
#----------------------------------------------------------------------------
_examples = '''examples:
# Project generated images
python %(prog)s project-generated-images --network=gdrive:networks/stylegan2-car-config-f.pkl --seeds=0,1,5
# Project real images
python %(prog)s project-real-images --network=gdrive:networks/stylegan2-car-config-f.pkl --dataset=car --data-dir=~/datasets
'''
#----------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(
description='''StyleGAN2 projector.
Run 'python %(prog)s --help' for subcommand help.''',
epilog=_examples,
formatter_class=argparse.RawDescriptionHelpFormatter
)
subparsers = parser.add_subparsers(help='Sub-commands', dest='command')
project_generated_images_parser = subparsers.add_parser('project-generated-images', help='Project generated images')
project_generated_images_parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True)
project_generated_images_parser.add_argument('--seeds', type=_parse_num_range, help='List of random seeds', default=range(3))
project_generated_images_parser.add_argument('--num-snapshots', type=int, help='Number of snapshots (default: %(default)s)', default=5)
project_generated_images_parser.add_argument('--truncation-psi', type=float, help='Truncation psi (default: %(default)s)', default=1.0)
project_generated_images_parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR')
project_real_images_parser = subparsers.add_parser('project-real-images', help='Project real images')
project_real_images_parser.add_argument('--network', help='Network pickle filename', dest='network_pkl', required=True)
project_real_images_parser.add_argument('--data-dir', help='Dataset root directory', required=True)
project_real_images_parser.add_argument('--dataset', help='Training dataset', dest='dataset_name', required=True)
project_real_images_parser.add_argument('--num-snapshots', type=int, help='Number of snapshots (default: %(default)s)', default=5)
project_real_images_parser.add_argument('--num-images', type=int, help='Number of images to project (default: %(default)s)', default=3)
project_real_images_parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR')
args = parser.parse_args()
subcmd = args.command
if subcmd is None:
print ('Error: missing subcommand. Re-run with --help for usage.')
sys.exit(1)
kwargs = vars(args)
sc = dnnlib.SubmitConfig()
sc.num_gpus = 1
sc.submit_target = dnnlib.SubmitTarget.LOCAL
sc.local.do_not_copy_source_files = True
sc.run_dir_root = kwargs.pop('result_dir')
sc.run_desc = kwargs.pop('command')
func_name_map = {
'project-generated-images': 'run_projector.project_generated_images',
'project-real-images': 'run_projector.project_real_images'
}
dnnlib.submit_run(sc, func_name_map[subcmd], **kwargs)
#----------------------------------------------------------------------------
if __name__ == "__main__":
main()
#----------------------------------------------------------------------------
================================================
FILE: run_training.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
import argparse
import copy
import os
import sys
import dnnlib
from dnnlib import EasyDict
from metrics.metric_defaults import metric_defaults
#----------------------------------------------------------------------------
_valid_configs = [
# Table 1
'config-a', # Baseline StyleGAN
'config-b', # + Weight demodulation
'config-c', # + Lazy regularization
'config-d', # + Path length regularization
'config-e', # + No growing, new G & D arch.
'config-f', # + Large networks (default)
# Table 2
'config-e-Gorig-Dorig', 'config-e-Gorig-Dresnet', 'config-e-Gorig-Dskip',
'config-e-Gresnet-Dorig', 'config-e-Gresnet-Dresnet', 'config-e-Gresnet-Dskip',
'config-e-Gskip-Dorig', 'config-e-Gskip-Dresnet', 'config-e-Gskip-Dskip',
]
#----------------------------------------------------------------------------
def run(dataset, data_dir, result_dir, config_id, num_gpus, total_kimg, gamma, mirror_augment, metrics):
train = EasyDict(run_func_name='training.training_loop.training_loop') # Options for training loop.
G = EasyDict(func_name='training.networks_stylegan2.G_main') # Options for generator network.
D = EasyDict(func_name='training.networks_stylegan2.D_stylegan2') # Options for discriminator network.
G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for generator optimizer.
D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) # Options for discriminator optimizer.
G_loss = EasyDict(func_name='training.loss.G_logistic_ns_pathreg') # Options for generator loss.
D_loss = EasyDict(func_name='training.loss.D_logistic_r1') # Options for discriminator loss.
sched = EasyDict() # Options for TrainingSchedule.
grid = EasyDict(size='8k', layout='random') # Options for setup_snapshot_image_grid().
sc = dnnlib.SubmitConfig() # Options for dnnlib.submit_run().
tf_config = {'rnd.np_random_seed': 1000} # Options for tflib.init_tf().
train.data_dir = data_dir
train.total_kimg = total_kimg
train.mirror_augment = mirror_augment
train.image_snapshot_ticks = train.network_snapshot_ticks = 10
sched.G_lrate_base = sched.D_lrate_base = 0.002
sched.minibatch_size_base = 32
sched.minibatch_gpu_base = 4
D_loss.gamma = 10
metrics = [metric_defaults[x] for x in metrics]
desc = 'stylegan2'
desc += '-' + dataset
dataset_args = EasyDict(tfrecord_dir=dataset)
assert num_gpus in [1, 2, 4, 8]
sc.num_gpus = num_gpus
desc += '-%dgpu' % num_gpus
assert config_id in _valid_configs
desc += '-' + config_id
# Configs A-E: Shrink networks to match original StyleGAN.
if config_id != 'config-f':
G.fmap_base = D.fmap_base = 8 << 10
# Config E: Set gamma to 100 and override G & D architecture.
if config_id.startswith('config-e'):
D_loss.gamma = 100
if 'Gorig' in config_id: G.architecture = 'orig'
if 'Gskip' in config_id: G.architecture = 'skip' # (default)
if 'Gresnet' in config_id: G.architecture = 'resnet'
if 'Dorig' in config_id: D.architecture = 'orig'
if 'Dskip' in config_id: D.architecture = 'skip'
if 'Dresnet' in config_id: D.architecture = 'resnet' # (default)
# Configs A-D: Enable progressive growing and switch to networks that support it.
if config_id in ['config-a', 'config-b', 'config-c', 'config-d']:
sched.lod_initial_resolution = 8
sched.G_lrate_base = sched.D_lrate_base = 0.001
sched.G_lrate_dict = sched.D_lrate_dict = {128: 0.0015, 256: 0.002, 512: 0.003, 1024: 0.003}
sched.minibatch_size_base = 32 # (default)
sched.minibatch_size_dict = {8: 256, 16: 128, 32: 64, 64: 32}
sched.minibatch_gpu_base = 4 # (default)
sched.minibatch_gpu_dict = {8: 32, 16: 16, 32: 8, 64: 4}
G.synthesis_func = 'G_synthesis_stylegan_revised'
D.func_name = 'training.networks_stylegan2.D_stylegan'
# Configs A-C: Disable path length regularization.
if config_id in ['config-a', 'config-b', 'config-c']:
G_loss = EasyDict(func_name='training.loss.G_logistic_ns')
# Configs A-B: Disable lazy regularization.
if config_id in ['config-a', 'config-b']:
train.lazy_regularization = False
# Config A: Switch to original StyleGAN networks.
if config_id == 'config-a':
G = EasyDict(func_name='training.networks_stylegan.G_style')
D = EasyDict(func_name='training.networks_stylegan.D_basic')
if gamma is not None:
D_loss.gamma = gamma
sc.submit_target = dnnlib.SubmitTarget.LOCAL
sc.local.do_not_copy_source_files = True
kwargs = EasyDict(train)
kwargs.update(G_args=G, D_args=D, G_opt_args=G_opt, D_opt_args=D_opt, G_loss_args=G_loss, D_loss_args=D_loss)
kwargs.update(dataset_args=dataset_args, sched_args=sched, grid_args=grid, metric_arg_list=metrics, tf_config=tf_config)
kwargs.submit_config = copy.deepcopy(sc)
kwargs.submit_config.run_dir_root = result_dir
kwargs.submit_config.run_desc = desc
dnnlib.submit_run(**kwargs)
#----------------------------------------------------------------------------
def _str_to_bool(v):
if isinstance(v, bool):
return v
if v.lower() in ('yes', 'true', 't', 'y', '1'):
return True
elif v.lower() in ('no', 'false', 'f', 'n', '0'):
return False
else:
raise argparse.ArgumentTypeError('Boolean value expected.')
def _parse_comma_sep(s):
if s is None or s.lower() == 'none' or s == '':
return []
return s.split(',')
#----------------------------------------------------------------------------
_examples = '''examples:
# Train StyleGAN2 using the FFHQ dataset
python %(prog)s --num-gpus=8 --data-dir=~/datasets --config=config-f --dataset=ffhq --mirror-augment=true
valid configs:
''' + ', '.join(_valid_configs) + '''
valid metrics:
''' + ', '.join(sorted([x for x in metric_defaults.keys()])) + '''
'''
def main():
parser = argparse.ArgumentParser(
description='Train StyleGAN2.',
epilog=_examples,
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('--result-dir', help='Root directory for run results (default: %(default)s)', default='results', metavar='DIR')
parser.add_argument('--data-dir', help='Dataset root directory', required=True)
parser.add_argument('--dataset', help='Training dataset', required=True)
parser.add_argument('--config', help='Training config (default: %(default)s)', default='config-f', required=True, dest='config_id', metavar='CONFIG')
parser.add_argument('--num-gpus', help='Number of GPUs (default: %(default)s)', default=1, type=int, metavar='N')
parser.add_argument('--total-kimg', help='Training length in thousands of images (default: %(default)s)', metavar='KIMG', default=25000, type=int)
parser.add_argument('--gamma', help='R1 regularization weight (default is config dependent)', default=None, type=float)
parser.add_argument('--mirror-augment', help='Mirror augment (default: %(default)s)', default=False, metavar='BOOL', type=_str_to_bool)
parser.add_argument('--metrics', help='Comma-separated list of metrics or "none" (default: %(default)s)', default='fid50k', type=_parse_comma_sep)
args = parser.parse_args()
if not os.path.exists(args.data_dir):
print ('Error: dataset root directory does not exist.')
sys.exit(1)
if args.config_id not in _valid_configs:
print ('Error: --config value must be one of: ', ', '.join(_valid_configs))
sys.exit(1)
for metric in args.metrics:
if metric not in metric_defaults:
print ('Error: unknown metric \'%s\'' % metric)
sys.exit(1)
run(**vars(args))
#----------------------------------------------------------------------------
if __name__ == "__main__":
main()
#----------------------------------------------------------------------------
================================================
FILE: test_nvcc.cu
================================================
// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
//
// This work is made available under the Nvidia Source Code License-NC.
// To view a copy of this license, visit
// https://nvlabs.github.io/stylegan2/license.html
#include
void checkCudaError(cudaError_t err)
{
if (err != cudaSuccess)
{
printf("%s: %s\n", cudaGetErrorName(err), cudaGetErrorString(err));
exit(1);
}
}
__global__ void cudaKernel(void)
{
printf("GPU says hello.\n");
}
int main(void)
{
printf("CPU says hello.\n");
checkCudaError(cudaLaunchKernel((void*)cudaKernel, 1, 1, NULL, 0, NULL));
checkCudaError(cudaDeviceSynchronize());
return 0;
}
================================================
FILE: training/__init__.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
# empty
================================================
FILE: training/dataset.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Multi-resolution input data pipeline."""
import os
import glob
import numpy as np
import tensorflow as tf
import dnnlib
import dnnlib.tflib as tflib
#----------------------------------------------------------------------------
# Dataset class that loads data from tfrecords files.
class TFRecordDataset:
def __init__(self,
tfrecord_dir, # Directory containing a collection of tfrecords files.
resolution = None, # Dataset resolution, None = autodetect.
label_file = None, # Relative path of the labels file, None = autodetect.
max_label_size = 0, # 0 = no labels, 'full' = full labels, = N first label components.
max_images = None, # Maximum number of images to use, None = use all images.
repeat = True, # Repeat dataset indefinitely?
shuffle_mb = 4096, # Shuffle data within specified window (megabytes), 0 = disable shuffling.
prefetch_mb = 2048, # Amount of data to prefetch (megabytes), 0 = disable prefetching.
buffer_mb = 256, # Read buffer size (megabytes).
num_threads = 2): # Number of concurrent threads.
self.tfrecord_dir = tfrecord_dir
self.resolution = None
self.resolution_log2 = None
self.shape = [] # [channels, height, width]
self.dtype = 'uint8'
self.dynamic_range = [0, 255]
self.label_file = label_file
self.label_size = None # components
self.label_dtype = None
self._np_labels = None
self._tf_minibatch_in = None
self._tf_labels_var = None
self._tf_labels_dataset = None
self._tf_datasets = dict()
self._tf_iterator = None
self._tf_init_ops = dict()
self._tf_minibatch_np = None
self._cur_minibatch = -1
self._cur_lod = -1
# List tfrecords files and inspect their shapes.
assert os.path.isdir(self.tfrecord_dir)
tfr_files = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*.tfrecords')))
assert len(tfr_files) >= 1
tfr_shapes = []
for tfr_file in tfr_files:
tfr_opt = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.NONE)
for record in tf.python_io.tf_record_iterator(tfr_file, tfr_opt):
tfr_shapes.append(self.parse_tfrecord_np(record).shape)
break
# Autodetect label filename.
if self.label_file is None:
guess = sorted(glob.glob(os.path.join(self.tfrecord_dir, '*.labels')))
if len(guess):
self.label_file = guess[0]
elif not os.path.isfile(self.label_file):
guess = os.path.join(self.tfrecord_dir, self.label_file)
if os.path.isfile(guess):
self.label_file = guess
# Determine shape and resolution.
max_shape = max(tfr_shapes, key=np.prod)
self.resolution = resolution if resolution is not None else max_shape[1]
self.resolution_log2 = int(np.log2(self.resolution))
self.shape = [max_shape[0], self.resolution, self.resolution]
tfr_lods = [self.resolution_log2 - int(np.log2(shape[1])) for shape in tfr_shapes]
assert all(shape[0] == max_shape[0] for shape in tfr_shapes)
assert all(shape[1] == shape[2] for shape in tfr_shapes)
assert all(shape[1] == self.resolution // (2**lod) for shape, lod in zip(tfr_shapes, tfr_lods))
assert all(lod in tfr_lods for lod in range(self.resolution_log2 - 1))
# Load labels.
assert max_label_size == 'full' or max_label_size >= 0
self._np_labels = np.zeros([1<<30, 0], dtype=np.float32)
if self.label_file is not None and max_label_size != 0:
self._np_labels = np.load(self.label_file)
assert self._np_labels.ndim == 2
if max_label_size != 'full' and self._np_labels.shape[1] > max_label_size:
self._np_labels = self._np_labels[:, :max_label_size]
if max_images is not None and self._np_labels.shape[0] > max_images:
self._np_labels = self._np_labels[:max_images]
self.label_size = self._np_labels.shape[1]
self.label_dtype = self._np_labels.dtype.name
# Build TF expressions.
with tf.name_scope('Dataset'), tf.device('/cpu:0'):
self._tf_minibatch_in = tf.placeholder(tf.int64, name='minibatch_in', shape=[])
self._tf_labels_var = tflib.create_var_with_large_initial_value(self._np_labels, name='labels_var')
self._tf_labels_dataset = tf.data.Dataset.from_tensor_slices(self._tf_labels_var)
for tfr_file, tfr_shape, tfr_lod in zip(tfr_files, tfr_shapes, tfr_lods):
if tfr_lod < 0:
continue
dset = tf.data.TFRecordDataset(tfr_file, compression_type='', buffer_size=buffer_mb<<20)
if max_images is not None:
dset = dset.take(max_images)
dset = dset.map(self.parse_tfrecord_tf, num_parallel_calls=num_threads)
dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset))
bytes_per_item = np.prod(tfr_shape) * np.dtype(self.dtype).itemsize
if shuffle_mb > 0:
dset = dset.shuffle(((shuffle_mb << 20) - 1) // bytes_per_item + 1)
if repeat:
dset = dset.repeat()
if prefetch_mb > 0:
dset = dset.prefetch(((prefetch_mb << 20) - 1) // bytes_per_item + 1)
dset = dset.batch(self._tf_minibatch_in)
self._tf_datasets[tfr_lod] = dset
self._tf_iterator = tf.data.Iterator.from_structure(self._tf_datasets[0].output_types, self._tf_datasets[0].output_shapes)
self._tf_init_ops = {lod: self._tf_iterator.make_initializer(dset) for lod, dset in self._tf_datasets.items()}
def close(self):
pass
# Use the given minibatch size and level-of-detail for the data returned by get_minibatch_tf().
def configure(self, minibatch_size, lod=0):
lod = int(np.floor(lod))
assert minibatch_size >= 1 and lod in self._tf_datasets
if self._cur_minibatch != minibatch_size or self._cur_lod != lod:
self._tf_init_ops[lod].run({self._tf_minibatch_in: minibatch_size})
self._cur_minibatch = minibatch_size
self._cur_lod = lod
# Get next minibatch as TensorFlow expressions.
def get_minibatch_tf(self): # => images, labels
return self._tf_iterator.get_next()
# Get next minibatch as NumPy arrays.
def get_minibatch_np(self, minibatch_size, lod=0): # => images, labels
self.configure(minibatch_size, lod)
with tf.name_scope('Dataset'):
if self._tf_minibatch_np is None:
self._tf_minibatch_np = self.get_minibatch_tf()
return tflib.run(self._tf_minibatch_np)
# Get random labels as TensorFlow expression.
def get_random_labels_tf(self, minibatch_size): # => labels
with tf.name_scope('Dataset'):
if self.label_size > 0:
with tf.device('/cpu:0'):
return tf.gather(self._tf_labels_var, tf.random_uniform([minibatch_size], 0, self._np_labels.shape[0], dtype=tf.int32))
return tf.zeros([minibatch_size, 0], self.label_dtype)
# Get random labels as NumPy array.
def get_random_labels_np(self, minibatch_size): # => labels
if self.label_size > 0:
return self._np_labels[np.random.randint(self._np_labels.shape[0], size=[minibatch_size])]
return np.zeros([minibatch_size, 0], self.label_dtype)
# Parse individual image from a tfrecords file into TensorFlow expression.
@staticmethod
def parse_tfrecord_tf(record):
features = tf.parse_single_example(record, features={
'shape': tf.FixedLenFeature([3], tf.int64),
'data': tf.FixedLenFeature([], tf.string)})
data = tf.decode_raw(features['data'], tf.uint8)
return tf.reshape(data, features['shape'])
# Parse individual image from a tfrecords file into NumPy array.
@staticmethod
def parse_tfrecord_np(record):
ex = tf.train.Example()
ex.ParseFromString(record)
shape = ex.features.feature['shape'].int64_list.value # pylint: disable=no-member
data = ex.features.feature['data'].bytes_list.value[0] # pylint: disable=no-member
return np.fromstring(data, np.uint8).reshape(shape)
#----------------------------------------------------------------------------
# Helper func for constructing a dataset object using the given options.
def load_dataset(class_name=None, data_dir=None, verbose=False, **kwargs):
kwargs = dict(kwargs)
if 'tfrecord_dir' in kwargs:
if class_name is None:
class_name = __name__ + '.TFRecordDataset'
if data_dir is not None:
kwargs['tfrecord_dir'] = os.path.join(data_dir, kwargs['tfrecord_dir'])
assert class_name is not None
if verbose:
print('Streaming data using %s...' % class_name)
dataset = dnnlib.util.get_obj_by_name(class_name)(**kwargs)
if verbose:
print('Dataset shape =', np.int32(dataset.shape).tolist())
print('Dynamic range =', dataset.dynamic_range)
print('Label size =', dataset.label_size)
return dataset
#----------------------------------------------------------------------------
================================================
FILE: training/loss.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Loss functions."""
import numpy as np
import tensorflow as tf
import dnnlib.tflib as tflib
from dnnlib.tflib.autosummary import autosummary
#----------------------------------------------------------------------------
# Logistic loss from the paper
# "Generative Adversarial Nets", Goodfellow et al. 2014
def G_logistic(G, D, opt, training_set, minibatch_size):
_ = opt
latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
labels = training_set.get_random_labels_tf(minibatch_size)
fake_images_out = G.get_output_for(latents, labels, is_training=True)
fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
loss = -tf.nn.softplus(fake_scores_out) # log(1-sigmoid(fake_scores_out)) # pylint: disable=invalid-unary-operand-type
return loss, None
def G_logistic_ns(G, D, opt, training_set, minibatch_size):
_ = opt
latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
labels = training_set.get_random_labels_tf(minibatch_size)
fake_images_out = G.get_output_for(latents, labels, is_training=True)
fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
loss = tf.nn.softplus(-fake_scores_out) # -log(sigmoid(fake_scores_out))
return loss, None
def D_logistic(G, D, opt, training_set, minibatch_size, reals, labels):
_ = opt, training_set
latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
fake_images_out = G.get_output_for(latents, labels, is_training=True)
real_scores_out = D.get_output_for(reals, labels, is_training=True)
fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
real_scores_out = autosummary('Loss/scores/real', real_scores_out)
fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out)
loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out))
loss += tf.nn.softplus(-real_scores_out) # -log(sigmoid(real_scores_out)) # pylint: disable=invalid-unary-operand-type
return loss, None
#----------------------------------------------------------------------------
# R1 and R2 regularizers from the paper
# "Which Training Methods for GANs do actually Converge?", Mescheder et al. 2018
def D_logistic_r1(G, D, opt, training_set, minibatch_size, reals, labels, gamma=10.0):
_ = opt, training_set
latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
fake_images_out = G.get_output_for(latents, labels, is_training=True)
real_scores_out = D.get_output_for(reals, labels, is_training=True)
fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
real_scores_out = autosummary('Loss/scores/real', real_scores_out)
fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out)
loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out))
loss += tf.nn.softplus(-real_scores_out) # -log(sigmoid(real_scores_out)) # pylint: disable=invalid-unary-operand-type
with tf.name_scope('GradientPenalty'):
real_grads = tf.gradients(tf.reduce_sum(real_scores_out), [reals])[0]
gradient_penalty = tf.reduce_sum(tf.square(real_grads), axis=[1,2,3])
gradient_penalty = autosummary('Loss/gradient_penalty', gradient_penalty)
reg = gradient_penalty * (gamma * 0.5)
return loss, reg
def D_logistic_r2(G, D, opt, training_set, minibatch_size, reals, labels, gamma=10.0):
_ = opt, training_set
latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
fake_images_out = G.get_output_for(latents, labels, is_training=True)
real_scores_out = D.get_output_for(reals, labels, is_training=True)
fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
real_scores_out = autosummary('Loss/scores/real', real_scores_out)
fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out)
loss = tf.nn.softplus(fake_scores_out) # -log(1-sigmoid(fake_scores_out))
loss += tf.nn.softplus(-real_scores_out) # -log(sigmoid(real_scores_out)) # pylint: disable=invalid-unary-operand-type
with tf.name_scope('GradientPenalty'):
fake_grads = tf.gradients(tf.reduce_sum(fake_scores_out), [fake_images_out])[0]
gradient_penalty = tf.reduce_sum(tf.square(fake_grads), axis=[1,2,3])
gradient_penalty = autosummary('Loss/gradient_penalty', gradient_penalty)
reg = gradient_penalty * (gamma * 0.5)
return loss, reg
#----------------------------------------------------------------------------
# WGAN loss from the paper
# "Wasserstein Generative Adversarial Networks", Arjovsky et al. 2017
def G_wgan(G, D, opt, training_set, minibatch_size):
_ = opt
latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
labels = training_set.get_random_labels_tf(minibatch_size)
fake_images_out = G.get_output_for(latents, labels, is_training=True)
fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
loss = -fake_scores_out
return loss, None
def D_wgan(G, D, opt, training_set, minibatch_size, reals, labels, wgan_epsilon=0.001):
_ = opt, training_set
latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
fake_images_out = G.get_output_for(latents, labels, is_training=True)
real_scores_out = D.get_output_for(reals, labels, is_training=True)
fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
real_scores_out = autosummary('Loss/scores/real', real_scores_out)
fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out)
loss = fake_scores_out - real_scores_out
with tf.name_scope('EpsilonPenalty'):
epsilon_penalty = autosummary('Loss/epsilon_penalty', tf.square(real_scores_out))
loss += epsilon_penalty * wgan_epsilon
return loss, None
#----------------------------------------------------------------------------
# WGAN-GP loss from the paper
# "Improved Training of Wasserstein GANs", Gulrajani et al. 2017
def D_wgan_gp(G, D, opt, training_set, minibatch_size, reals, labels, wgan_lambda=10.0, wgan_epsilon=0.001, wgan_target=1.0):
_ = opt, training_set
latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
fake_images_out = G.get_output_for(latents, labels, is_training=True)
real_scores_out = D.get_output_for(reals, labels, is_training=True)
fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
real_scores_out = autosummary('Loss/scores/real', real_scores_out)
fake_scores_out = autosummary('Loss/scores/fake', fake_scores_out)
loss = fake_scores_out - real_scores_out
with tf.name_scope('EpsilonPenalty'):
epsilon_penalty = autosummary('Loss/epsilon_penalty', tf.square(real_scores_out))
loss += epsilon_penalty * wgan_epsilon
with tf.name_scope('GradientPenalty'):
mixing_factors = tf.random_uniform([minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype)
mixed_images_out = tflib.lerp(tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors)
mixed_scores_out = D.get_output_for(mixed_images_out, labels, is_training=True)
mixed_scores_out = autosummary('Loss/scores/mixed', mixed_scores_out)
mixed_grads = tf.gradients(tf.reduce_sum(mixed_scores_out), [mixed_images_out])[0]
mixed_norms = tf.sqrt(tf.reduce_sum(tf.square(mixed_grads), axis=[1,2,3]))
mixed_norms = autosummary('Loss/mixed_norms', mixed_norms)
gradient_penalty = tf.square(mixed_norms - wgan_target)
reg = gradient_penalty * (wgan_lambda / (wgan_target**2))
return loss, reg
#----------------------------------------------------------------------------
# Non-saturating logistic loss with path length regularizer from the paper
# "Analyzing and Improving the Image Quality of StyleGAN", Karras et al. 2019
def G_logistic_ns_pathreg(G, D, opt, training_set, minibatch_size, pl_minibatch_shrink=2, pl_decay=0.01, pl_weight=2.0):
_ = opt
latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:])
labels = training_set.get_random_labels_tf(minibatch_size)
fake_images_out, fake_dlatents_out = G.get_output_for(latents, labels, is_training=True, return_dlatents=True)
fake_scores_out = D.get_output_for(fake_images_out, labels, is_training=True)
loss = tf.nn.softplus(-fake_scores_out) # -log(sigmoid(fake_scores_out))
# Path length regularization.
with tf.name_scope('PathReg'):
# Evaluate the regularization term using a smaller minibatch to conserve memory.
if pl_minibatch_shrink > 1:
pl_minibatch = minibatch_size // pl_minibatch_shrink
pl_latents = tf.random_normal([pl_minibatch] + G.input_shapes[0][1:])
pl_labels = training_set.get_random_labels_tf(pl_minibatch)
fake_images_out, fake_dlatents_out = G.get_output_for(pl_latents, pl_labels, is_training=True, return_dlatents=True)
# Compute |J*y|.
pl_noise = tf.random_normal(tf.shape(fake_images_out)) / np.sqrt(np.prod(G.output_shape[2:]))
pl_grads = tf.gradients(tf.reduce_sum(fake_images_out * pl_noise), [fake_dlatents_out])[0]
pl_lengths = tf.sqrt(tf.reduce_mean(tf.reduce_sum(tf.square(pl_grads), axis=2), axis=1))
pl_lengths = autosummary('Loss/pl_lengths', pl_lengths)
# Track exponential moving average of |J*y|.
with tf.control_dependencies(None):
pl_mean_var = tf.Variable(name='pl_mean', trainable=False, initial_value=0.0, dtype=tf.float32)
pl_mean = pl_mean_var + pl_decay * (tf.reduce_mean(pl_lengths) - pl_mean_var)
pl_update = tf.assign(pl_mean_var, pl_mean)
# Calculate (|J*y|-a)^2.
with tf.control_dependencies([pl_update]):
pl_penalty = tf.square(pl_lengths - pl_mean)
pl_penalty = autosummary('Loss/pl_penalty', pl_penalty)
# Apply weight.
#
# Note: The division in pl_noise decreases the weight by num_pixels, and the reduce_mean
# in pl_lengths decreases it by num_affine_layers. The effective weight then becomes:
#
# gamma_pl = pl_weight / num_pixels / num_affine_layers
# = 2 / (r^2) / (log2(r) * 2 - 2)
# = 1 / (r^2 * (log2(r) - 1))
# = ln(2) / (r^2 * (ln(r) - ln(2))
#
reg = pl_penalty * pl_weight
return loss, reg
#----------------------------------------------------------------------------
================================================
FILE: training/misc.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Miscellaneous utility functions."""
import os
import pickle
import numpy as np
import PIL.Image
import PIL.ImageFont
import dnnlib
#----------------------------------------------------------------------------
# Convenience wrappers for pickle that are able to load data produced by
# older versions of the code, and from external URLs.
def open_file_or_url(file_or_url):
if dnnlib.util.is_url(file_or_url):
return dnnlib.util.open_url(file_or_url, cache_dir='.stylegan2-cache')
return open(file_or_url, 'rb')
def load_pkl(file_or_url):
with open_file_or_url(file_or_url) as file:
return pickle.load(file, encoding='latin1')
def save_pkl(obj, filename):
with open(filename, 'wb') as file:
pickle.dump(obj, file, protocol=pickle.HIGHEST_PROTOCOL)
#----------------------------------------------------------------------------
# Image utils.
def adjust_dynamic_range(data, drange_in, drange_out):
if drange_in != drange_out:
scale = (np.float32(drange_out[1]) - np.float32(drange_out[0])) / (np.float32(drange_in[1]) - np.float32(drange_in[0]))
bias = (np.float32(drange_out[0]) - np.float32(drange_in[0]) * scale)
data = data * scale + bias
return data
def create_image_grid(images, grid_size=None):
assert images.ndim == 3 or images.ndim == 4
num, img_w, img_h = images.shape[0], images.shape[-1], images.shape[-2]
if grid_size is not None:
grid_w, grid_h = tuple(grid_size)
else:
grid_w = max(int(np.ceil(np.sqrt(num))), 1)
grid_h = max((num - 1) // grid_w + 1, 1)
grid = np.zeros(list(images.shape[1:-2]) + [grid_h * img_h, grid_w * img_w], dtype=images.dtype)
for idx in range(num):
x = (idx % grid_w) * img_w
y = (idx // grid_w) * img_h
grid[..., y : y + img_h, x : x + img_w] = images[idx]
return grid
def convert_to_pil_image(image, drange=[0,1]):
assert image.ndim == 2 or image.ndim == 3
if image.ndim == 3:
if image.shape[0] == 1:
image = image[0] # grayscale CHW => HW
else:
image = image.transpose(1, 2, 0) # CHW -> HWC
image = adjust_dynamic_range(image, drange, [0,255])
image = np.rint(image).clip(0, 255).astype(np.uint8)
fmt = 'RGB' if image.ndim == 3 else 'L'
return PIL.Image.fromarray(image, fmt)
def save_image_grid(images, filename, drange=[0,1], grid_size=None):
convert_to_pil_image(create_image_grid(images, grid_size), drange).save(filename)
def apply_mirror_augment(minibatch):
mask = np.random.rand(minibatch.shape[0]) < 0.5
minibatch = np.array(minibatch)
minibatch[mask] = minibatch[mask, :, :, ::-1]
return minibatch
#----------------------------------------------------------------------------
# Loading data from previous training runs.
def parse_config_for_previous_run(run_dir):
with open(os.path.join(run_dir, 'submit_config.pkl'), 'rb') as f:
data = pickle.load(f)
data = data.get('run_func_kwargs', {})
return dict(train=data, dataset=data.get('dataset_args', {}))
#----------------------------------------------------------------------------
# Size and contents of the image snapshot grids that are exported
# periodically during training.
def setup_snapshot_image_grid(training_set,
size = '1080p', # '1080p' = to be viewed on 1080p display, '4k' = to be viewed on 4k display.
layout = 'random'): # 'random' = grid contents are selected randomly, 'row_per_class' = each row corresponds to one class label.
# Select size.
gw = 1; gh = 1
if size == '1080p':
gw = np.clip(1920 // training_set.shape[2], 3, 32)
gh = np.clip(1080 // training_set.shape[1], 2, 32)
if size == '4k':
gw = np.clip(3840 // training_set.shape[2], 7, 32)
gh = np.clip(2160 // training_set.shape[1], 4, 32)
if size == '8k':
gw = np.clip(7680 // training_set.shape[2], 7, 32)
gh = np.clip(4320 // training_set.shape[1], 4, 32)
# Initialize data arrays.
reals = np.zeros([gw * gh] + training_set.shape, dtype=training_set.dtype)
labels = np.zeros([gw * gh, training_set.label_size], dtype=training_set.label_dtype)
# Random layout.
if layout == 'random':
reals[:], labels[:] = training_set.get_minibatch_np(gw * gh)
# Class-conditional layouts.
class_layouts = dict(row_per_class=[gw,1], col_per_class=[1,gh], class4x4=[4,4])
if layout in class_layouts:
bw, bh = class_layouts[layout]
nw = (gw - 1) // bw + 1
nh = (gh - 1) // bh + 1
blocks = [[] for _i in range(nw * nh)]
for _iter in range(1000000):
real, label = training_set.get_minibatch_np(1)
idx = np.argmax(label[0])
while idx < len(blocks) and len(blocks[idx]) >= bw * bh:
idx += training_set.label_size
if idx < len(blocks):
blocks[idx].append((real, label))
if all(len(block) >= bw * bh for block in blocks):
break
for i, block in enumerate(blocks):
for j, (real, label) in enumerate(block):
x = (i % nw) * bw + j % bw
y = (i // nw) * bh + j // bw
if x < gw and y < gh:
reals[x + y * gw] = real[0]
labels[x + y * gw] = label[0]
return (gw, gh), reals, labels
#----------------------------------------------------------------------------
================================================
FILE: training/networks_stylegan.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Network architectures used in the StyleGAN paper."""
import numpy as np
import tensorflow as tf
import dnnlib
import dnnlib.tflib as tflib
# NOTE: Do not import any application-specific modules here!
# Specify all network parameters as kwargs.
#----------------------------------------------------------------------------
# Primitive ops for manipulating 4D activation tensors.
# The gradients of these are not necessary efficient or even meaningful.
def _blur2d(x, f=[1,2,1], normalize=True, flip=False, stride=1):
assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:])
assert isinstance(stride, int) and stride >= 1
# Finalize filter kernel.
f = np.array(f, dtype=np.float32)
if f.ndim == 1:
f = f[:, np.newaxis] * f[np.newaxis, :]
assert f.ndim == 2
if normalize:
f /= np.sum(f)
if flip:
f = f[::-1, ::-1]
f = f[:, :, np.newaxis, np.newaxis]
f = np.tile(f, [1, 1, int(x.shape[1]), 1])
# No-op => early exit.
if f.shape == (1, 1) and f[0,0] == 1:
return x
# Convolve using depthwise_conv2d.
orig_dtype = x.dtype
x = tf.cast(x, tf.float32) # tf.nn.depthwise_conv2d() doesn't support fp16
f = tf.constant(f, dtype=x.dtype, name='filter')
strides = [1, 1, stride, stride]
x = tf.nn.depthwise_conv2d(x, f, strides=strides, padding='SAME', data_format='NCHW')
x = tf.cast(x, orig_dtype)
return x
def _upscale2d(x, factor=2, gain=1):
assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:])
assert isinstance(factor, int) and factor >= 1
# Apply gain.
if gain != 1:
x *= gain
# No-op => early exit.
if factor == 1:
return x
# Upscale using tf.tile().
s = x.shape
x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1])
x = tf.tile(x, [1, 1, 1, factor, 1, factor])
x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor])
return x
def _downscale2d(x, factor=2, gain=1):
assert x.shape.ndims == 4 and all(dim.value is not None for dim in x.shape[1:])
assert isinstance(factor, int) and factor >= 1
# 2x2, float32 => downscale using _blur2d().
if factor == 2 and x.dtype == tf.float32:
f = [np.sqrt(gain) / factor] * factor
return _blur2d(x, f=f, normalize=False, stride=factor)
# Apply gain.
if gain != 1:
x *= gain
# No-op => early exit.
if factor == 1:
return x
# Large factor => downscale using tf.nn.avg_pool().
# NOTE: Requires tf_config['graph_options.place_pruned_graph']=True to work.
ksize = [1, 1, factor, factor]
return tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding='VALID', data_format='NCHW')
#----------------------------------------------------------------------------
# High-level ops for manipulating 4D activation tensors.
# The gradients of these are meant to be as efficient as possible.
def blur2d(x, f=[1,2,1], normalize=True):
with tf.variable_scope('Blur2D'):
@tf.custom_gradient
def func(x):
y = _blur2d(x, f, normalize)
@tf.custom_gradient
def grad(dy):
dx = _blur2d(dy, f, normalize, flip=True)
return dx, lambda ddx: _blur2d(ddx, f, normalize)
return y, grad
return func(x)
def upscale2d(x, factor=2):
with tf.variable_scope('Upscale2D'):
@tf.custom_gradient
def func(x):
y = _upscale2d(x, factor)
@tf.custom_gradient
def grad(dy):
dx = _downscale2d(dy, factor, gain=factor**2)
return dx, lambda ddx: _upscale2d(ddx, factor)
return y, grad
return func(x)
def downscale2d(x, factor=2):
with tf.variable_scope('Downscale2D'):
@tf.custom_gradient
def func(x):
y = _downscale2d(x, factor)
@tf.custom_gradient
def grad(dy):
dx = _upscale2d(dy, factor, gain=1/factor**2)
return dx, lambda ddx: _downscale2d(ddx, factor)
return y, grad
return func(x)
#----------------------------------------------------------------------------
# Get/create weight tensor for a convolutional or fully-connected layer.
def get_weight(shape, gain=np.sqrt(2), use_wscale=False, lrmul=1):
fan_in = np.prod(shape[:-1]) # [kernel, kernel, fmaps_in, fmaps_out] or [in, out]
he_std = gain / np.sqrt(fan_in) # He init
# Equalized learning rate and custom learning rate multiplier.
if use_wscale:
init_std = 1.0 / lrmul
runtime_coef = he_std * lrmul
else:
init_std = he_std / lrmul
runtime_coef = lrmul
# Create variable.
init = tf.initializers.random_normal(0, init_std)
return tf.get_variable('weight', shape=shape, initializer=init) * runtime_coef
#----------------------------------------------------------------------------
# Fully-connected layer.
def dense(x, fmaps, **kwargs):
if len(x.shape) > 2:
x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])])
w = get_weight([x.shape[1].value, fmaps], **kwargs)
w = tf.cast(w, x.dtype)
return tf.matmul(x, w)
#----------------------------------------------------------------------------
# Convolutional layer.
def conv2d(x, fmaps, kernel, **kwargs):
assert kernel >= 1 and kernel % 2 == 1
w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs)
w = tf.cast(w, x.dtype)
return tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='SAME', data_format='NCHW')
#----------------------------------------------------------------------------
# Fused convolution + scaling.
# Faster and uses less memory than performing the operations separately.
def upscale2d_conv2d(x, fmaps, kernel, fused_scale='auto', **kwargs):
assert kernel >= 1 and kernel % 2 == 1
assert fused_scale in [True, False, 'auto']
if fused_scale == 'auto':
fused_scale = min(x.shape[2:]) * 2 >= 128
# Not fused => call the individual ops directly.
if not fused_scale:
return conv2d(upscale2d(x), fmaps, kernel, **kwargs)
# Fused => perform both ops simultaneously using tf.nn.conv2d_transpose().
w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs)
w = tf.transpose(w, [0, 1, 3, 2]) # [kernel, kernel, fmaps_out, fmaps_in]
w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT')
w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]])
w = tf.cast(w, x.dtype)
os = [tf.shape(x)[0], fmaps, x.shape[2] * 2, x.shape[3] * 2]
return tf.nn.conv2d_transpose(x, w, os, strides=[1,1,2,2], padding='SAME', data_format='NCHW')
def conv2d_downscale2d(x, fmaps, kernel, fused_scale='auto', **kwargs):
assert kernel >= 1 and kernel % 2 == 1
assert fused_scale in [True, False, 'auto']
if fused_scale == 'auto':
fused_scale = min(x.shape[2:]) >= 128
# Not fused => call the individual ops directly.
if not fused_scale:
return downscale2d(conv2d(x, fmaps, kernel, **kwargs))
# Fused => perform both ops simultaneously using tf.nn.conv2d().
w = get_weight([kernel, kernel, x.shape[1].value, fmaps], **kwargs)
w = tf.pad(w, [[1,1], [1,1], [0,0], [0,0]], mode='CONSTANT')
w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) * 0.25
w = tf.cast(w, x.dtype)
return tf.nn.conv2d(x, w, strides=[1,1,2,2], padding='SAME', data_format='NCHW')
#----------------------------------------------------------------------------
# Apply bias to the given activation tensor.
def apply_bias(x, lrmul=1):
b = tf.get_variable('bias', shape=[x.shape[1]], initializer=tf.initializers.zeros()) * lrmul
b = tf.cast(b, x.dtype)
if len(x.shape) == 2:
return x + b
return x + tf.reshape(b, [1, -1, 1, 1])
#----------------------------------------------------------------------------
# Leaky ReLU activation. More efficient than tf.nn.leaky_relu() and supports FP16.
def leaky_relu(x, alpha=0.2):
with tf.variable_scope('LeakyReLU'):
alpha = tf.constant(alpha, dtype=x.dtype, name='alpha')
@tf.custom_gradient
def func(x):
y = tf.maximum(x, x * alpha)
@tf.custom_gradient
def grad(dy):
dx = tf.where(y >= 0, dy, dy * alpha)
return dx, lambda ddx: tf.where(y >= 0, ddx, ddx * alpha)
return y, grad
return func(x)
#----------------------------------------------------------------------------
# Pixelwise feature vector normalization.
def pixel_norm(x, epsilon=1e-8):
with tf.variable_scope('PixelNorm'):
epsilon = tf.constant(epsilon, dtype=x.dtype, name='epsilon')
return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + epsilon)
#----------------------------------------------------------------------------
# Instance normalization.
def instance_norm(x, epsilon=1e-8):
assert len(x.shape) == 4 # NCHW
with tf.variable_scope('InstanceNorm'):
orig_dtype = x.dtype
x = tf.cast(x, tf.float32)
x -= tf.reduce_mean(x, axis=[2,3], keepdims=True)
epsilon = tf.constant(epsilon, dtype=x.dtype, name='epsilon')
x *= tf.rsqrt(tf.reduce_mean(tf.square(x), axis=[2,3], keepdims=True) + epsilon)
x = tf.cast(x, orig_dtype)
return x
#----------------------------------------------------------------------------
# Style modulation.
def style_mod(x, dlatent, **kwargs):
with tf.variable_scope('StyleMod'):
style = apply_bias(dense(dlatent, fmaps=x.shape[1]*2, gain=1, **kwargs))
style = tf.reshape(style, [-1, 2, x.shape[1]] + [1] * (len(x.shape) - 2))
return x * (style[:,0] + 1) + style[:,1]
#----------------------------------------------------------------------------
# Noise input.
def apply_noise(x, noise_var=None, randomize_noise=True):
assert len(x.shape) == 4 # NCHW
with tf.variable_scope('Noise'):
if noise_var is None or randomize_noise:
noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype)
else:
noise = tf.cast(noise_var, x.dtype)
weight = tf.get_variable('weight', shape=[x.shape[1].value], initializer=tf.initializers.zeros())
return x + noise * tf.reshape(tf.cast(weight, x.dtype), [1, -1, 1, 1])
#----------------------------------------------------------------------------
# Minibatch standard deviation.
def minibatch_stddev_layer(x, group_size=4, num_new_features=1):
with tf.variable_scope('MinibatchStddev'):
group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size.
s = x.shape # [NCHW] Input shape.
y = tf.reshape(x, [group_size, -1, num_new_features, s[1]//num_new_features, s[2], s[3]]) # [GMncHW] Split minibatch into M groups of size G. Split channels into n channel groups c.
y = tf.cast(y, tf.float32) # [GMncHW] Cast to FP32.
y -= tf.reduce_mean(y, axis=0, keepdims=True) # [GMncHW] Subtract mean over group.
y = tf.reduce_mean(tf.square(y), axis=0) # [MncHW] Calc variance over group.
y = tf.sqrt(y + 1e-8) # [MncHW] Calc stddev over group.
y = tf.reduce_mean(y, axis=[2,3,4], keepdims=True) # [Mn111] Take average over fmaps and pixels.
y = tf.reduce_mean(y, axis=[2]) # [Mn11] Split channels into c channel groups
y = tf.cast(y, x.dtype) # [Mn11] Cast back to original data type.
y = tf.tile(y, [group_size, 1, s[2], s[3]]) # [NnHW] Replicate over group and pixels.
return tf.concat([x, y], axis=1) # [NCHW] Append as new fmap.
#----------------------------------------------------------------------------
# Style-based generator used in the StyleGAN paper.
# Composed of two sub-networks (G_mapping and G_synthesis) that are defined below.
def G_style(
latents_in, # First input: Latent vectors (Z) [minibatch, latent_size].
labels_in, # Second input: Conditioning labels [minibatch, label_size].
truncation_psi = 0.7, # Style strength multiplier for the truncation trick. None = disable.
truncation_cutoff = 8, # Number of layers for which to apply the truncation trick. None = disable.
truncation_psi_val = None, # Value for truncation_psi to use during validation.
truncation_cutoff_val = None, # Value for truncation_cutoff to use during validation.
dlatent_avg_beta = 0.995, # Decay for tracking the moving average of W during training. None = disable.
style_mixing_prob = 0.9, # Probability of mixing styles during training. None = disable.
is_training = False, # Network is under training? Enables and disables specific features.
is_validation = False, # Network is under validation? Chooses which value to use for truncation_psi.
is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation.
components = dnnlib.EasyDict(), # Container for sub-networks. Retained between calls.
**kwargs): # Arguments for sub-networks (G_mapping and G_synthesis).
# Validate arguments.
assert not is_training or not is_validation
assert isinstance(components, dnnlib.EasyDict)
if is_validation:
truncation_psi = truncation_psi_val
truncation_cutoff = truncation_cutoff_val
if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1):
truncation_psi = None
if is_training or (truncation_cutoff is not None and not tflib.is_tf_expression(truncation_cutoff) and truncation_cutoff <= 0):
truncation_cutoff = None
if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1):
dlatent_avg_beta = None
if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0):
style_mixing_prob = None
# Setup components.
if 'synthesis' not in components:
components.synthesis = tflib.Network('G_synthesis', func_name=G_synthesis, **kwargs)
num_layers = components.synthesis.input_shape[1]
dlatent_size = components.synthesis.input_shape[2]
if 'mapping' not in components:
components.mapping = tflib.Network('G_mapping', func_name=G_mapping, dlatent_broadcast=num_layers, **kwargs)
# Setup variables.
lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False)
dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False)
# Evaluate mapping network.
dlatents = components.mapping.get_output_for(latents_in, labels_in, **kwargs)
# Update moving average of W.
if dlatent_avg_beta is not None:
with tf.variable_scope('DlatentAvg'):
batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0)
update_op = tf.assign(dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta))
with tf.control_dependencies([update_op]):
dlatents = tf.identity(dlatents)
# Perform style mixing regularization.
if style_mixing_prob is not None:
with tf.name_scope('StyleMix'):
latents2 = tf.random_normal(tf.shape(latents_in))
dlatents2 = components.mapping.get_output_for(latents2, labels_in, **kwargs)
layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2
mixing_cutoff = tf.cond(
tf.random_uniform([], 0.0, 1.0) < style_mixing_prob,
lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32),
lambda: cur_layers)
dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2)
# Apply truncation trick.
if truncation_psi is not None and truncation_cutoff is not None:
with tf.variable_scope('Truncation'):
layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
ones = np.ones(layer_idx.shape, dtype=np.float32)
coefs = tf.where(layer_idx < truncation_cutoff, truncation_psi * ones, ones)
dlatents = tflib.lerp(dlatent_avg, dlatents, coefs)
# Evaluate synthesis network.
with tf.control_dependencies([tf.assign(components.synthesis.find_var('lod'), lod_in)]):
images_out = components.synthesis.get_output_for(dlatents, force_clean_graph=is_template_graph, **kwargs)
return tf.identity(images_out, name='images_out')
#----------------------------------------------------------------------------
# Mapping network used in the StyleGAN paper.
def G_mapping(
latents_in, # First input: Latent vectors (Z) [minibatch, latent_size].
labels_in, # Second input: Conditioning labels [minibatch, label_size].
latent_size = 512, # Latent vector (Z) dimensionality.
label_size = 0, # Label dimensionality, 0 if no labels.
dlatent_size = 512, # Disentangled latent (W) dimensionality.
dlatent_broadcast = None, # Output disentangled latent (W) as [minibatch, dlatent_size] or [minibatch, dlatent_broadcast, dlatent_size].
mapping_layers = 8, # Number of mapping layers.
mapping_fmaps = 512, # Number of activations in the mapping layers.
mapping_lrmul = 0.01, # Learning rate multiplier for the mapping layers.
mapping_nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu'.
use_wscale = True, # Enable equalized learning rate?
normalize_latents = True, # Normalize latent vectors (Z) before feeding them to the mapping layers?
dtype = 'float32', # Data type to use for activations and outputs.
**_kwargs): # Ignore unrecognized keyword args.
act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[mapping_nonlinearity]
# Inputs.
latents_in.set_shape([None, latent_size])
labels_in.set_shape([None, label_size])
latents_in = tf.cast(latents_in, dtype)
labels_in = tf.cast(labels_in, dtype)
x = latents_in
# Embed labels and concatenate them with latents.
if label_size:
with tf.variable_scope('LabelConcat'):
w = tf.get_variable('weight', shape=[label_size, latent_size], initializer=tf.initializers.random_normal())
y = tf.matmul(labels_in, tf.cast(w, dtype))
x = tf.concat([x, y], axis=1)
# Normalize latents.
if normalize_latents:
x = pixel_norm(x)
# Mapping layers.
for layer_idx in range(mapping_layers):
with tf.variable_scope('Dense%d' % layer_idx):
fmaps = dlatent_size if layer_idx == mapping_layers - 1 else mapping_fmaps
x = dense(x, fmaps=fmaps, gain=gain, use_wscale=use_wscale, lrmul=mapping_lrmul)
x = apply_bias(x, lrmul=mapping_lrmul)
x = act(x)
# Broadcast.
if dlatent_broadcast is not None:
with tf.variable_scope('Broadcast'):
x = tf.tile(x[:, np.newaxis], [1, dlatent_broadcast, 1])
# Output.
assert x.dtype == tf.as_dtype(dtype)
return tf.identity(x, name='dlatents_out')
#----------------------------------------------------------------------------
# Synthesis network used in the StyleGAN paper.
def G_synthesis(
dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size].
dlatent_size = 512, # Disentangled latent (W) dimensionality.
num_channels = 3, # Number of output color channels.
resolution = 1024, # Output resolution.
fmap_base = 8192, # Overall multiplier for the number of feature maps.
fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution.
fmap_max = 512, # Maximum number of feature maps in any layer.
use_styles = True, # Enable style inputs?
const_input_layer = True, # First layer is a learned constant?
use_noise = True, # Enable noise inputs?
randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables.
nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu'
use_wscale = True, # Enable equalized learning rate?
use_pixel_norm = False, # Enable pixelwise feature vector normalization?
use_instance_norm = True, # Enable instance normalization?
dtype = 'float32', # Data type to use for activations and outputs.
fused_scale = 'auto', # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically.
blur_filter = [1,2,1], # Low-pass filter to apply when resampling activations. None = no filtering.
structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically.
is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation.
force_clean_graph = False, # True = construct a clean graph that looks nice in TensorBoard, False = default behavior.
**_kwargs): # Ignore unrecognized keyword args.
resolution_log2 = int(np.log2(resolution))
assert resolution == 2**resolution_log2 and resolution >= 4
def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max)
def blur(x): return blur2d(x, blur_filter) if blur_filter else x
if is_template_graph: force_clean_graph = True
if force_clean_graph: randomize_noise = False
if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive'
act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity]
num_layers = resolution_log2 * 2 - 2
num_styles = num_layers if use_styles else 1
images_out = None
# Primary inputs.
dlatents_in.set_shape([None, num_styles, dlatent_size])
dlatents_in = tf.cast(dlatents_in, dtype)
lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype)
# Noise inputs.
noise_inputs = []
if use_noise:
for layer_idx in range(num_layers):
res = layer_idx // 2 + 2
shape = [1, use_noise, 2**res, 2**res]
noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False))
# Things to do at the end of each layer.
def layer_epilogue(x, layer_idx):
if use_noise:
x = apply_noise(x, noise_inputs[layer_idx], randomize_noise=randomize_noise)
x = apply_bias(x)
x = act(x)
if use_pixel_norm:
x = pixel_norm(x)
if use_instance_norm:
x = instance_norm(x)
if use_styles:
x = style_mod(x, dlatents_in[:, layer_idx], use_wscale=use_wscale)
return x
# Early layers.
with tf.variable_scope('4x4'):
if const_input_layer:
with tf.variable_scope('Const'):
x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.ones())
x = layer_epilogue(tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1]), 0)
else:
with tf.variable_scope('Dense'):
x = dense(dlatents_in[:, 0], fmaps=nf(1)*16, gain=gain/4, use_wscale=use_wscale) # tweak gain to match the official implementation of Progressing GAN
x = layer_epilogue(tf.reshape(x, [-1, nf(1), 4, 4]), 0)
with tf.variable_scope('Conv'):
x = layer_epilogue(conv2d(x, fmaps=nf(1), kernel=3, gain=gain, use_wscale=use_wscale), 1)
# Building blocks for remaining layers.
def block(res, x): # res = 3..resolution_log2
with tf.variable_scope('%dx%d' % (2**res, 2**res)):
with tf.variable_scope('Conv0_up'):
x = layer_epilogue(blur(upscale2d_conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale)), res*2-4)
with tf.variable_scope('Conv1'):
x = layer_epilogue(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale), res*2-3)
return x
def torgb(res, x): # res = 2..resolution_log2
lod = resolution_log2 - res
with tf.variable_scope('ToRGB_lod%d' % lod):
return apply_bias(conv2d(x, fmaps=num_channels, kernel=1, gain=1, use_wscale=use_wscale))
# Fixed structure: simple and efficient, but does not support progressive growing.
if structure == 'fixed':
for res in range(3, resolution_log2 + 1):
x = block(res, x)
images_out = torgb(resolution_log2, x)
# Linear structure: simple but inefficient.
if structure == 'linear':
images_out = torgb(2, x)
for res in range(3, resolution_log2 + 1):
lod = resolution_log2 - res
x = block(res, x)
img = torgb(res, x)
images_out = upscale2d(images_out)
with tf.variable_scope('Grow_lod%d' % lod):
images_out = tflib.lerp_clip(img, images_out, lod_in - lod)
# Recursive structure: complex but efficient.
if structure == 'recursive':
def cset(cur_lambda, new_cond, new_lambda):
return lambda: tf.cond(new_cond, new_lambda, cur_lambda)
def grow(x, res, lod):
y = block(res, x)
img = lambda: upscale2d(torgb(res, y), 2**lod)
img = cset(img, (lod_in > lod), lambda: upscale2d(tflib.lerp(torgb(res, y), upscale2d(torgb(res - 1, x)), lod_in - lod), 2**lod))
if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1))
return img()
images_out = grow(x, 3, resolution_log2 - 3)
assert images_out.dtype == tf.as_dtype(dtype)
return tf.identity(images_out, name='images_out')
#----------------------------------------------------------------------------
# Discriminator used in the StyleGAN paper.
def D_basic(
images_in, # First input: Images [minibatch, channel, height, width].
labels_in, # Second input: Labels [minibatch, label_size].
num_channels = 1, # Number of input color channels. Overridden based on dataset.
resolution = 32, # Input resolution. Overridden based on dataset.
label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset.
fmap_base = 8192, # Overall multiplier for the number of feature maps.
fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution.
fmap_max = 512, # Maximum number of feature maps in any layer.
nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu',
use_wscale = True, # Enable equalized learning rate?
mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable.
mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer.
dtype = 'float32', # Data type to use for activations and outputs.
fused_scale = 'auto', # True = fused convolution + scaling, False = separate ops, 'auto' = decide automatically.
blur_filter = [1,2,1], # Low-pass filter to apply when resampling activations. None = no filtering.
structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically.
is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation.
**_kwargs): # Ignore unrecognized keyword args.
resolution_log2 = int(np.log2(resolution))
assert resolution == 2**resolution_log2 and resolution >= 4
def nf(stage): return min(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max)
def blur(x): return blur2d(x, blur_filter) if blur_filter else x
if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive'
act, gain = {'relu': (tf.nn.relu, np.sqrt(2)), 'lrelu': (leaky_relu, np.sqrt(2))}[nonlinearity]
images_in.set_shape([None, num_channels, resolution, resolution])
labels_in.set_shape([None, label_size])
images_in = tf.cast(images_in, dtype)
labels_in = tf.cast(labels_in, dtype)
lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype)
scores_out = None
# Building blocks.
def fromrgb(x, res): # res = 2..resolution_log2
with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)):
return act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=1, gain=gain, use_wscale=use_wscale)))
def block(x, res): # res = 2..resolution_log2
with tf.variable_scope('%dx%d' % (2**res, 2**res)):
if res >= 3: # 8x8 and up
with tf.variable_scope('Conv0'):
x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale)))
with tf.variable_scope('Conv1_down'):
x = act(apply_bias(conv2d_downscale2d(blur(x), fmaps=nf(res-2), kernel=3, gain=gain, use_wscale=use_wscale, fused_scale=fused_scale)))
else: # 4x4
if mbstd_group_size > 1:
x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features)
with tf.variable_scope('Conv'):
x = act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, gain=gain, use_wscale=use_wscale)))
with tf.variable_scope('Dense0'):
x = act(apply_bias(dense(x, fmaps=nf(res-2), gain=gain, use_wscale=use_wscale)))
with tf.variable_scope('Dense1'):
x = apply_bias(dense(x, fmaps=max(label_size, 1), gain=1, use_wscale=use_wscale))
return x
# Fixed structure: simple and efficient, but does not support progressive growing.
if structure == 'fixed':
x = fromrgb(images_in, resolution_log2)
for res in range(resolution_log2, 2, -1):
x = block(x, res)
scores_out = block(x, 2)
# Linear structure: simple but inefficient.
if structure == 'linear':
img = images_in
x = fromrgb(img, resolution_log2)
for res in range(resolution_log2, 2, -1):
lod = resolution_log2 - res
x = block(x, res)
img = downscale2d(img)
y = fromrgb(img, res - 1)
with tf.variable_scope('Grow_lod%d' % lod):
x = tflib.lerp_clip(x, y, lod_in - lod)
scores_out = block(x, 2)
# Recursive structure: complex but efficient.
if structure == 'recursive':
def cset(cur_lambda, new_cond, new_lambda):
return lambda: tf.cond(new_cond, new_lambda, cur_lambda)
def grow(res, lod):
x = lambda: fromrgb(downscale2d(images_in, 2**lod), res)
if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1))
x = block(x(), res); y = lambda: x
if res > 2: y = cset(y, (lod_in > lod), lambda: tflib.lerp(x, fromrgb(downscale2d(images_in, 2**(lod+1)), res - 1), lod_in - lod))
return y()
scores_out = grow(2, resolution_log2 - 2)
# Label conditioning from "Which Training Methods for GANs do actually Converge?"
if label_size:
with tf.variable_scope('LabelSwitch'):
scores_out = tf.reduce_sum(scores_out * labels_in, axis=1, keepdims=True)
assert scores_out.dtype == tf.as_dtype(dtype)
scores_out = tf.identity(scores_out, name='scores_out')
return scores_out
#----------------------------------------------------------------------------
================================================
FILE: training/networks_stylegan2.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Network architectures used in the StyleGAN2 paper."""
import numpy as np
import tensorflow as tf
import dnnlib
import dnnlib.tflib as tflib
from dnnlib.tflib.ops.upfirdn_2d import upsample_2d, downsample_2d, upsample_conv_2d, conv_downsample_2d
from dnnlib.tflib.ops.fused_bias_act import fused_bias_act
# NOTE: Do not import any application-specific modules here!
# Specify all network parameters as kwargs.
#----------------------------------------------------------------------------
# Get/create weight tensor for a convolution or fully-connected layer.
def get_weight(shape, gain=1, use_wscale=True, lrmul=1, weight_var='weight'):
fan_in = np.prod(shape[:-1]) # [kernel, kernel, fmaps_in, fmaps_out] or [in, out]
he_std = gain / np.sqrt(fan_in) # He init
# Equalized learning rate and custom learning rate multiplier.
if use_wscale:
init_std = 1.0 / lrmul
runtime_coef = he_std * lrmul
else:
init_std = he_std / lrmul
runtime_coef = lrmul
# Create variable.
init = tf.initializers.random_normal(0, init_std)
return tf.get_variable(weight_var, shape=shape, initializer=init) * runtime_coef
#----------------------------------------------------------------------------
# Fully-connected layer.
def dense_layer(x, fmaps, gain=1, use_wscale=True, lrmul=1, weight_var='weight'):
if len(x.shape) > 2:
x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])])
w = get_weight([x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale, lrmul=lrmul, weight_var=weight_var)
w = tf.cast(w, x.dtype)
return tf.matmul(x, w)
#----------------------------------------------------------------------------
# Convolution layer with optional upsampling or downsampling.
def conv2d_layer(x, fmaps, kernel, up=False, down=False, resample_kernel=None, gain=1, use_wscale=True, lrmul=1, weight_var='weight'):
assert not (up and down)
assert kernel >= 1 and kernel % 2 == 1
w = get_weight([kernel, kernel, x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale, lrmul=lrmul, weight_var=weight_var)
if up:
x = upsample_conv_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel)
elif down:
x = conv_downsample_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel)
else:
x = tf.nn.conv2d(x, tf.cast(w, x.dtype), data_format='NCHW', strides=[1,1,1,1], padding='SAME')
return x
#----------------------------------------------------------------------------
# Apply bias and activation func.
def apply_bias_act(x, act='linear', alpha=None, gain=None, lrmul=1, bias_var='bias'):
b = tf.get_variable(bias_var, shape=[x.shape[1]], initializer=tf.initializers.zeros()) * lrmul
return fused_bias_act(x, b=tf.cast(b, x.dtype), act=act, alpha=alpha, gain=gain)
#----------------------------------------------------------------------------
# Naive upsampling (nearest neighbor) and downsampling (average pooling).
def naive_upsample_2d(x, factor=2):
with tf.variable_scope('NaiveUpsample'):
_N, C, H, W = x.shape.as_list()
x = tf.reshape(x, [-1, C, H, 1, W, 1])
x = tf.tile(x, [1, 1, 1, factor, 1, factor])
return tf.reshape(x, [-1, C, H * factor, W * factor])
def naive_downsample_2d(x, factor=2):
with tf.variable_scope('NaiveDownsample'):
_N, C, H, W = x.shape.as_list()
x = tf.reshape(x, [-1, C, H // factor, factor, W // factor, factor])
return tf.reduce_mean(x, axis=[3,5])
#----------------------------------------------------------------------------
# Modulated convolution layer.
def modulated_conv2d_layer(x, y, fmaps, kernel, up=False, down=False, demodulate=True, resample_kernel=None, gain=1, use_wscale=True, lrmul=1, fused_modconv=True, weight_var='weight', mod_weight_var='mod_weight', mod_bias_var='mod_bias'):
assert not (up and down)
assert kernel >= 1 and kernel % 2 == 1
# Get weight.
w = get_weight([kernel, kernel, x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale, lrmul=lrmul, weight_var=weight_var)
ww = w[np.newaxis] # [BkkIO] Introduce minibatch dimension.
# Modulate.
s = dense_layer(y, fmaps=x.shape[1].value, weight_var=mod_weight_var) # [BI] Transform incoming W to style.
s = apply_bias_act(s, bias_var=mod_bias_var) + 1 # [BI] Add bias (initially 1).
ww *= tf.cast(s[:, np.newaxis, np.newaxis, :, np.newaxis], w.dtype) # [BkkIO] Scale input feature maps.
# Demodulate.
if demodulate:
d = tf.rsqrt(tf.reduce_sum(tf.square(ww), axis=[1,2,3]) + 1e-8) # [BO] Scaling factor.
ww *= d[:, np.newaxis, np.newaxis, np.newaxis, :] # [BkkIO] Scale output feature maps.
# Reshape/scale input.
if fused_modconv:
x = tf.reshape(x, [1, -1, x.shape[2], x.shape[3]]) # Fused => reshape minibatch to convolution groups.
w = tf.reshape(tf.transpose(ww, [1, 2, 3, 0, 4]), [ww.shape[1], ww.shape[2], ww.shape[3], -1])
else:
x *= tf.cast(s[:, :, np.newaxis, np.newaxis], x.dtype) # [BIhw] Not fused => scale input activations.
# Convolution with optional up/downsampling.
if up:
x = upsample_conv_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel)
elif down:
x = conv_downsample_2d(x, tf.cast(w, x.dtype), data_format='NCHW', k=resample_kernel)
else:
x = tf.nn.conv2d(x, tf.cast(w, x.dtype), data_format='NCHW', strides=[1,1,1,1], padding='SAME')
# Reshape/scale output.
if fused_modconv:
x = tf.reshape(x, [-1, fmaps, x.shape[2], x.shape[3]]) # Fused => reshape convolution groups back to minibatch.
elif demodulate:
x *= tf.cast(d[:, :, np.newaxis, np.newaxis], x.dtype) # [BOhw] Not fused => scale output activations.
return x
#----------------------------------------------------------------------------
# Minibatch standard deviation layer.
def minibatch_stddev_layer(x, group_size=4, num_new_features=1):
group_size = tf.minimum(group_size, tf.shape(x)[0]) # Minibatch must be divisible by (or smaller than) group_size.
s = x.shape # [NCHW] Input shape.
y = tf.reshape(x, [group_size, -1, num_new_features, s[1]//num_new_features, s[2], s[3]]) # [GMncHW] Split minibatch into M groups of size G. Split channels into n channel groups c.
y = tf.cast(y, tf.float32) # [GMncHW] Cast to FP32.
y -= tf.reduce_mean(y, axis=0, keepdims=True) # [GMncHW] Subtract mean over group.
y = tf.reduce_mean(tf.square(y), axis=0) # [MncHW] Calc variance over group.
y = tf.sqrt(y + 1e-8) # [MncHW] Calc stddev over group.
y = tf.reduce_mean(y, axis=[2,3,4], keepdims=True) # [Mn111] Take average over fmaps and pixels.
y = tf.reduce_mean(y, axis=[2]) # [Mn11] Split channels into c channel groups
y = tf.cast(y, x.dtype) # [Mn11] Cast back to original data type.
y = tf.tile(y, [group_size, 1, s[2], s[3]]) # [NnHW] Replicate over group and pixels.
return tf.concat([x, y], axis=1) # [NCHW] Append as new fmap.
#----------------------------------------------------------------------------
# Main generator network.
# Composed of two sub-networks (mapping and synthesis) that are defined below.
# Used in configs B-F (Table 1).
def G_main(
latents_in, # First input: Latent vectors (Z) [minibatch, latent_size].
labels_in, # Second input: Conditioning labels [minibatch, label_size].
truncation_psi = 0.5, # Style strength multiplier for the truncation trick. None = disable.
truncation_cutoff = None, # Number of layers for which to apply the truncation trick. None = disable.
truncation_psi_val = None, # Value for truncation_psi to use during validation.
truncation_cutoff_val = None, # Value for truncation_cutoff to use during validation.
dlatent_avg_beta = 0.995, # Decay for tracking the moving average of W during training. None = disable.
style_mixing_prob = 0.9, # Probability of mixing styles during training. None = disable.
is_training = False, # Network is under training? Enables and disables specific features.
is_validation = False, # Network is under validation? Chooses which value to use for truncation_psi.
return_dlatents = False, # Return dlatents in addition to the images?
is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation.
components = dnnlib.EasyDict(), # Container for sub-networks. Retained between calls.
mapping_func = 'G_mapping', # Build func name for the mapping network.
synthesis_func = 'G_synthesis_stylegan2', # Build func name for the synthesis network.
**kwargs): # Arguments for sub-networks (mapping and synthesis).
# Validate arguments.
assert not is_training or not is_validation
assert isinstance(components, dnnlib.EasyDict)
if is_validation:
truncation_psi = truncation_psi_val
truncation_cutoff = truncation_cutoff_val
if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1):
truncation_psi = None
if is_training:
truncation_cutoff = None
if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1):
dlatent_avg_beta = None
if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0):
style_mixing_prob = None
# Setup components.
if 'synthesis' not in components:
components.synthesis = tflib.Network('G_synthesis', func_name=globals()[synthesis_func], **kwargs)
num_layers = components.synthesis.input_shape[1]
dlatent_size = components.synthesis.input_shape[2]
if 'mapping' not in components:
components.mapping = tflib.Network('G_mapping', func_name=globals()[mapping_func], dlatent_broadcast=num_layers, **kwargs)
# Setup variables.
lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False)
dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False)
# Evaluate mapping network.
dlatents = components.mapping.get_output_for(latents_in, labels_in, is_training=is_training, **kwargs)
dlatents = tf.cast(dlatents, tf.float32)
# Update moving average of W.
if dlatent_avg_beta is not None:
with tf.variable_scope('DlatentAvg'):
batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0)
update_op = tf.assign(dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta))
with tf.control_dependencies([update_op]):
dlatents = tf.identity(dlatents)
# Perform style mixing regularization.
if style_mixing_prob is not None:
with tf.variable_scope('StyleMix'):
latents2 = tf.random_normal(tf.shape(latents_in))
dlatents2 = components.mapping.get_output_for(latents2, labels_in, is_training=is_training, **kwargs)
dlatents2 = tf.cast(dlatents2, tf.float32)
layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2
mixing_cutoff = tf.cond(
tf.random_uniform([], 0.0, 1.0) < style_mixing_prob,
lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32),
lambda: cur_layers)
dlatents = tf.where(tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2)
# Apply truncation trick.
if truncation_psi is not None:
with tf.variable_scope('Truncation'):
layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis]
layer_psi = np.ones(layer_idx.shape, dtype=np.float32)
if truncation_cutoff is None:
layer_psi *= truncation_psi
else:
layer_psi = tf.where(layer_idx < truncation_cutoff, layer_psi * truncation_psi, layer_psi)
dlatents = tflib.lerp(dlatent_avg, dlatents, layer_psi)
# Evaluate synthesis network.
deps = []
if 'lod' in components.synthesis.vars:
deps.append(tf.assign(components.synthesis.vars['lod'], lod_in))
with tf.control_dependencies(deps):
images_out = components.synthesis.get_output_for(dlatents, is_training=is_training, force_clean_graph=is_template_graph, **kwargs)
# Return requested outputs.
images_out = tf.identity(images_out, name='images_out')
if return_dlatents:
return images_out, dlatents
return images_out
#----------------------------------------------------------------------------
# Mapping network.
# Transforms the input latent code (z) to the disentangled latent code (w).
# Used in configs B-F (Table 1).
def G_mapping(
latents_in, # First input: Latent vectors (Z) [minibatch, latent_size].
labels_in, # Second input: Conditioning labels [minibatch, label_size].
latent_size = 512, # Latent vector (Z) dimensionality.
label_size = 0, # Label dimensionality, 0 if no labels.
dlatent_size = 512, # Disentangled latent (W) dimensionality.
dlatent_broadcast = None, # Output disentangled latent (W) as [minibatch, dlatent_size] or [minibatch, dlatent_broadcast, dlatent_size].
mapping_layers = 8, # Number of mapping layers.
mapping_fmaps = 512, # Number of activations in the mapping layers.
mapping_lrmul = 0.01, # Learning rate multiplier for the mapping layers.
mapping_nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc.
normalize_latents = True, # Normalize latent vectors (Z) before feeding them to the mapping layers?
dtype = 'float32', # Data type to use for activations and outputs.
**_kwargs): # Ignore unrecognized keyword args.
act = mapping_nonlinearity
# Inputs.
latents_in.set_shape([None, latent_size])
labels_in.set_shape([None, label_size])
latents_in = tf.cast(latents_in, dtype)
labels_in = tf.cast(labels_in, dtype)
x = latents_in
# Embed labels and concatenate them with latents.
if label_size:
with tf.variable_scope('LabelConcat'):
w = tf.get_variable('weight', shape=[label_size, latent_size], initializer=tf.initializers.random_normal())
y = tf.matmul(labels_in, tf.cast(w, dtype))
x = tf.concat([x, y], axis=1)
# Normalize latents.
if normalize_latents:
with tf.variable_scope('Normalize'):
x *= tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + 1e-8)
# Mapping layers.
for layer_idx in range(mapping_layers):
with tf.variable_scope('Dense%d' % layer_idx):
fmaps = dlatent_size if layer_idx == mapping_layers - 1 else mapping_fmaps
x = apply_bias_act(dense_layer(x, fmaps=fmaps, lrmul=mapping_lrmul), act=act, lrmul=mapping_lrmul)
# Broadcast.
if dlatent_broadcast is not None:
with tf.variable_scope('Broadcast'):
x = tf.tile(x[:, np.newaxis], [1, dlatent_broadcast, 1])
# Output.
assert x.dtype == tf.as_dtype(dtype)
return tf.identity(x, name='dlatents_out')
#----------------------------------------------------------------------------
# StyleGAN synthesis network with revised architecture (Figure 2d).
# Implements progressive growing, but no skip connections or residual nets (Figure 7).
# Used in configs B-D (Table 1).
def G_synthesis_stylegan_revised(
dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size].
dlatent_size = 512, # Disentangled latent (W) dimensionality.
num_channels = 3, # Number of output color channels.
resolution = 1024, # Output resolution.
fmap_base = 16 << 10, # Overall multiplier for the number of feature maps.
fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution.
fmap_min = 1, # Minimum number of feature maps in any layer.
fmap_max = 512, # Maximum number of feature maps in any layer.
randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables.
nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc.
dtype = 'float32', # Data type to use for activations and outputs.
resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering.
fused_modconv = True, # Implement modulated_conv2d_layer() as a single fused op?
structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically.
is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation.
force_clean_graph = False, # True = construct a clean graph that looks nice in TensorBoard, False = default behavior.
**_kwargs): # Ignore unrecognized keyword args.
resolution_log2 = int(np.log2(resolution))
assert resolution == 2**resolution_log2 and resolution >= 4
def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max)
if is_template_graph: force_clean_graph = True
if force_clean_graph: randomize_noise = False
if structure == 'auto': structure = 'linear' if force_clean_graph else 'recursive'
act = nonlinearity
num_layers = resolution_log2 * 2 - 2
images_out = None
# Primary inputs.
dlatents_in.set_shape([None, num_layers, dlatent_size])
dlatents_in = tf.cast(dlatents_in, dtype)
lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0), trainable=False), dtype)
# Noise inputs.
noise_inputs = []
for layer_idx in range(num_layers - 1):
res = (layer_idx + 5) // 2
shape = [1, 1, 2**res, 2**res]
noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False))
# Single convolution layer with all the bells and whistles.
def layer(x, layer_idx, fmaps, kernel, up=False):
x = modulated_conv2d_layer(x, dlatents_in[:, layer_idx], fmaps=fmaps, kernel=kernel, up=up, resample_kernel=resample_kernel, fused_modconv=fused_modconv)
if randomize_noise:
noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype)
else:
noise = tf.cast(noise_inputs[layer_idx], x.dtype)
noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros())
x += noise * tf.cast(noise_strength, x.dtype)
return apply_bias_act(x, act=act)
# Early layers.
with tf.variable_scope('4x4'):
with tf.variable_scope('Const'):
x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.random_normal())
x = tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1])
with tf.variable_scope('Conv'):
x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3)
# Building blocks for remaining layers.
def block(res, x): # res = 3..resolution_log2
with tf.variable_scope('%dx%d' % (2**res, 2**res)):
with tf.variable_scope('Conv0_up'):
x = layer(x, layer_idx=res*2-5, fmaps=nf(res-1), kernel=3, up=True)
with tf.variable_scope('Conv1'):
x = layer(x, layer_idx=res*2-4, fmaps=nf(res-1), kernel=3)
return x
def torgb(res, x): # res = 2..resolution_log2
with tf.variable_scope('ToRGB_lod%d' % (resolution_log2 - res)):
return apply_bias_act(modulated_conv2d_layer(x, dlatents_in[:, res*2-3], fmaps=num_channels, kernel=1, demodulate=False, fused_modconv=fused_modconv))
# Fixed structure: simple and efficient, but does not support progressive growing.
if structure == 'fixed':
for res in range(3, resolution_log2 + 1):
x = block(res, x)
images_out = torgb(resolution_log2, x)
# Linear structure: simple but inefficient.
if structure == 'linear':
images_out = torgb(2, x)
for res in range(3, resolution_log2 + 1):
lod = resolution_log2 - res
x = block(res, x)
img = torgb(res, x)
with tf.variable_scope('Upsample_lod%d' % lod):
images_out = upsample_2d(images_out)
with tf.variable_scope('Grow_lod%d' % lod):
images_out = tflib.lerp_clip(img, images_out, lod_in - lod)
# Recursive structure: complex but efficient.
if structure == 'recursive':
def cset(cur_lambda, new_cond, new_lambda):
return lambda: tf.cond(new_cond, new_lambda, cur_lambda)
def grow(x, res, lod):
y = block(res, x)
img = lambda: naive_upsample_2d(torgb(res, y), factor=2**lod)
img = cset(img, (lod_in > lod), lambda: naive_upsample_2d(tflib.lerp(torgb(res, y), upsample_2d(torgb(res - 1, x)), lod_in - lod), factor=2**lod))
if lod > 0: img = cset(img, (lod_in < lod), lambda: grow(y, res + 1, lod - 1))
return img()
images_out = grow(x, 3, resolution_log2 - 3)
assert images_out.dtype == tf.as_dtype(dtype)
return tf.identity(images_out, name='images_out')
#----------------------------------------------------------------------------
# StyleGAN2 synthesis network (Figure 7).
# Implements skip connections and residual nets (Figure 7), but no progressive growing.
# Used in configs E-F (Table 1).
def G_synthesis_stylegan2(
dlatents_in, # Input: Disentangled latents (W) [minibatch, num_layers, dlatent_size].
dlatent_size = 512, # Disentangled latent (W) dimensionality.
num_channels = 3, # Number of output color channels.
resolution = 1024, # Output resolution.
fmap_base = 16 << 10, # Overall multiplier for the number of feature maps.
fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution.
fmap_min = 1, # Minimum number of feature maps in any layer.
fmap_max = 512, # Maximum number of feature maps in any layer.
randomize_noise = True, # True = randomize noise inputs every time (non-deterministic), False = read noise inputs from variables.
architecture = 'skip', # Architecture: 'orig', 'skip', 'resnet'.
nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc.
dtype = 'float32', # Data type to use for activations and outputs.
resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering.
fused_modconv = True, # Implement modulated_conv2d_layer() as a single fused op?
**_kwargs): # Ignore unrecognized keyword args.
resolution_log2 = int(np.log2(resolution))
assert resolution == 2**resolution_log2 and resolution >= 4
def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max)
assert architecture in ['orig', 'skip', 'resnet']
act = nonlinearity
num_layers = resolution_log2 * 2 - 2
images_out = None
# Primary inputs.
dlatents_in.set_shape([None, num_layers, dlatent_size])
dlatents_in = tf.cast(dlatents_in, dtype)
# Noise inputs.
noise_inputs = []
for layer_idx in range(num_layers - 1):
res = (layer_idx + 5) // 2
shape = [1, 1, 2**res, 2**res]
noise_inputs.append(tf.get_variable('noise%d' % layer_idx, shape=shape, initializer=tf.initializers.random_normal(), trainable=False))
# Single convolution layer with all the bells and whistles.
def layer(x, layer_idx, fmaps, kernel, up=False):
x = modulated_conv2d_layer(x, dlatents_in[:, layer_idx], fmaps=fmaps, kernel=kernel, up=up, resample_kernel=resample_kernel, fused_modconv=fused_modconv)
if randomize_noise:
noise = tf.random_normal([tf.shape(x)[0], 1, x.shape[2], x.shape[3]], dtype=x.dtype)
else:
noise = tf.cast(noise_inputs[layer_idx], x.dtype)
noise_strength = tf.get_variable('noise_strength', shape=[], initializer=tf.initializers.zeros())
x += noise * tf.cast(noise_strength, x.dtype)
return apply_bias_act(x, act=act)
# Building blocks for main layers.
def block(x, res): # res = 3..resolution_log2
t = x
with tf.variable_scope('Conv0_up'):
x = layer(x, layer_idx=res*2-5, fmaps=nf(res-1), kernel=3, up=True)
with tf.variable_scope('Conv1'):
x = layer(x, layer_idx=res*2-4, fmaps=nf(res-1), kernel=3)
if architecture == 'resnet':
with tf.variable_scope('Skip'):
t = conv2d_layer(t, fmaps=nf(res-1), kernel=1, up=True, resample_kernel=resample_kernel)
x = (x + t) * (1 / np.sqrt(2))
return x
def upsample(y):
with tf.variable_scope('Upsample'):
return upsample_2d(y, k=resample_kernel)
def torgb(x, y, res): # res = 2..resolution_log2
with tf.variable_scope('ToRGB'):
t = apply_bias_act(modulated_conv2d_layer(x, dlatents_in[:, res*2-3], fmaps=num_channels, kernel=1, demodulate=False, fused_modconv=fused_modconv))
return t if y is None else y + t
# Early layers.
y = None
with tf.variable_scope('4x4'):
with tf.variable_scope('Const'):
x = tf.get_variable('const', shape=[1, nf(1), 4, 4], initializer=tf.initializers.random_normal())
x = tf.tile(tf.cast(x, dtype), [tf.shape(dlatents_in)[0], 1, 1, 1])
with tf.variable_scope('Conv'):
x = layer(x, layer_idx=0, fmaps=nf(1), kernel=3)
if architecture == 'skip':
y = torgb(x, y, 2)
# Main layers.
for res in range(3, resolution_log2 + 1):
with tf.variable_scope('%dx%d' % (2**res, 2**res)):
x = block(x, res)
if architecture == 'skip':
y = upsample(y)
if architecture == 'skip' or res == resolution_log2:
y = torgb(x, y, res)
images_out = y
assert images_out.dtype == tf.as_dtype(dtype)
return tf.identity(images_out, name='images_out')
#----------------------------------------------------------------------------
# Original StyleGAN discriminator.
# Used in configs B-D (Table 1).
def D_stylegan(
images_in, # First input: Images [minibatch, channel, height, width].
labels_in, # Second input: Labels [minibatch, label_size].
num_channels = 3, # Number of input color channels. Overridden based on dataset.
resolution = 1024, # Input resolution. Overridden based on dataset.
label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset.
fmap_base = 16 << 10, # Overall multiplier for the number of feature maps.
fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution.
fmap_min = 1, # Minimum number of feature maps in any layer.
fmap_max = 512, # Maximum number of feature maps in any layer.
nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc.
mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable.
mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer.
dtype = 'float32', # Data type to use for activations and outputs.
resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering.
structure = 'auto', # 'fixed' = no progressive growing, 'linear' = human-readable, 'recursive' = efficient, 'auto' = select automatically.
is_template_graph = False, # True = template graph constructed by the Network class, False = actual evaluation.
**_kwargs): # Ignore unrecognized keyword args.
resolution_log2 = int(np.log2(resolution))
assert resolution == 2**resolution_log2 and resolution >= 4
def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max)
if structure == 'auto': structure = 'linear' if is_template_graph else 'recursive'
act = nonlinearity
images_in.set_shape([None, num_channels, resolution, resolution])
labels_in.set_shape([None, label_size])
images_in = tf.cast(images_in, dtype)
labels_in = tf.cast(labels_in, dtype)
lod_in = tf.cast(tf.get_variable('lod', initializer=np.float32(0.0), trainable=False), dtype)
# Building blocks for spatial layers.
def fromrgb(x, res): # res = 2..resolution_log2
with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)):
return apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=1), act=act)
def block(x, res): # res = 2..resolution_log2
with tf.variable_scope('%dx%d' % (2**res, 2**res)):
with tf.variable_scope('Conv0'):
x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=3), act=act)
with tf.variable_scope('Conv1_down'):
x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-2), kernel=3, down=True, resample_kernel=resample_kernel), act=act)
return x
# Fixed structure: simple and efficient, but does not support progressive growing.
if structure == 'fixed':
x = fromrgb(images_in, resolution_log2)
for res in range(resolution_log2, 2, -1):
x = block(x, res)
# Linear structure: simple but inefficient.
if structure == 'linear':
img = images_in
x = fromrgb(img, resolution_log2)
for res in range(resolution_log2, 2, -1):
lod = resolution_log2 - res
x = block(x, res)
with tf.variable_scope('Downsample_lod%d' % lod):
img = downsample_2d(img)
y = fromrgb(img, res - 1)
with tf.variable_scope('Grow_lod%d' % lod):
x = tflib.lerp_clip(x, y, lod_in - lod)
# Recursive structure: complex but efficient.
if structure == 'recursive':
def cset(cur_lambda, new_cond, new_lambda):
return lambda: tf.cond(new_cond, new_lambda, cur_lambda)
def grow(res, lod):
x = lambda: fromrgb(naive_downsample_2d(images_in, factor=2**lod), res)
if lod > 0: x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1))
x = block(x(), res); y = lambda: x
y = cset(y, (lod_in > lod), lambda: tflib.lerp(x, fromrgb(naive_downsample_2d(images_in, factor=2**(lod+1)), res - 1), lod_in - lod))
return y()
x = grow(3, resolution_log2 - 3)
# Final layers at 4x4 resolution.
with tf.variable_scope('4x4'):
if mbstd_group_size > 1:
with tf.variable_scope('MinibatchStddev'):
x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features)
with tf.variable_scope('Conv'):
x = apply_bias_act(conv2d_layer(x, fmaps=nf(1), kernel=3), act=act)
with tf.variable_scope('Dense0'):
x = apply_bias_act(dense_layer(x, fmaps=nf(0)), act=act)
# Output layer with label conditioning from "Which Training Methods for GANs do actually Converge?"
with tf.variable_scope('Output'):
x = apply_bias_act(dense_layer(x, fmaps=max(labels_in.shape[1], 1)))
if labels_in.shape[1] > 0:
x = tf.reduce_sum(x * labels_in, axis=1, keepdims=True)
scores_out = x
# Output.
assert scores_out.dtype == tf.as_dtype(dtype)
scores_out = tf.identity(scores_out, name='scores_out')
return scores_out
#----------------------------------------------------------------------------
# StyleGAN2 discriminator (Figure 7).
# Implements skip connections and residual nets (Figure 7), but no progressive growing.
# Used in configs E-F (Table 1).
def D_stylegan2(
images_in, # First input: Images [minibatch, channel, height, width].
labels_in, # Second input: Labels [minibatch, label_size].
num_channels = 3, # Number of input color channels. Overridden based on dataset.
resolution = 1024, # Input resolution. Overridden based on dataset.
label_size = 0, # Dimensionality of the labels, 0 if no labels. Overridden based on dataset.
fmap_base = 16 << 10, # Overall multiplier for the number of feature maps.
fmap_decay = 1.0, # log2 feature map reduction when doubling the resolution.
fmap_min = 1, # Minimum number of feature maps in any layer.
fmap_max = 512, # Maximum number of feature maps in any layer.
architecture = 'resnet', # Architecture: 'orig', 'skip', 'resnet'.
nonlinearity = 'lrelu', # Activation function: 'relu', 'lrelu', etc.
mbstd_group_size = 4, # Group size for the minibatch standard deviation layer, 0 = disable.
mbstd_num_features = 1, # Number of features for the minibatch standard deviation layer.
dtype = 'float32', # Data type to use for activations and outputs.
resample_kernel = [1,3,3,1], # Low-pass filter to apply when resampling activations. None = no filtering.
**_kwargs): # Ignore unrecognized keyword args.
resolution_log2 = int(np.log2(resolution))
assert resolution == 2**resolution_log2 and resolution >= 4
def nf(stage): return np.clip(int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_min, fmap_max)
assert architecture in ['orig', 'skip', 'resnet']
act = nonlinearity
images_in.set_shape([None, num_channels, resolution, resolution])
labels_in.set_shape([None, label_size])
images_in = tf.cast(images_in, dtype)
labels_in = tf.cast(labels_in, dtype)
# Building blocks for main layers.
def fromrgb(x, y, res): # res = 2..resolution_log2
with tf.variable_scope('FromRGB'):
t = apply_bias_act(conv2d_layer(y, fmaps=nf(res-1), kernel=1), act=act)
return t if x is None else x + t
def block(x, res): # res = 2..resolution_log2
t = x
with tf.variable_scope('Conv0'):
x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-1), kernel=3), act=act)
with tf.variable_scope('Conv1_down'):
x = apply_bias_act(conv2d_layer(x, fmaps=nf(res-2), kernel=3, down=True, resample_kernel=resample_kernel), act=act)
if architecture == 'resnet':
with tf.variable_scope('Skip'):
t = conv2d_layer(t, fmaps=nf(res-2), kernel=1, down=True, resample_kernel=resample_kernel)
x = (x + t) * (1 / np.sqrt(2))
return x
def downsample(y):
with tf.variable_scope('Downsample'):
return downsample_2d(y, k=resample_kernel)
# Main layers.
x = None
y = images_in
for res in range(resolution_log2, 2, -1):
with tf.variable_scope('%dx%d' % (2**res, 2**res)):
if architecture == 'skip' or res == resolution_log2:
x = fromrgb(x, y, res)
x = block(x, res)
if architecture == 'skip':
y = downsample(y)
# Final layers.
with tf.variable_scope('4x4'):
if architecture == 'skip':
x = fromrgb(x, y, 2)
if mbstd_group_size > 1:
with tf.variable_scope('MinibatchStddev'):
x = minibatch_stddev_layer(x, mbstd_group_size, mbstd_num_features)
with tf.variable_scope('Conv'):
x = apply_bias_act(conv2d_layer(x, fmaps=nf(1), kernel=3), act=act)
with tf.variable_scope('Dense0'):
x = apply_bias_act(dense_layer(x, fmaps=nf(0)), act=act)
# Output layer with label conditioning from "Which Training Methods for GANs do actually Converge?"
with tf.variable_scope('Output'):
x = apply_bias_act(dense_layer(x, fmaps=max(labels_in.shape[1], 1)))
if labels_in.shape[1] > 0:
x = tf.reduce_sum(x * labels_in, axis=1, keepdims=True)
scores_out = x
# Output.
assert scores_out.dtype == tf.as_dtype(dtype)
scores_out = tf.identity(scores_out, name='scores_out')
return scores_out
#----------------------------------------------------------------------------
================================================
FILE: training/training_loop.py
================================================
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
#
# This work is made available under the Nvidia Source Code License-NC.
# To view a copy of this license, visit
# https://nvlabs.github.io/stylegan2/license.html
"""Main training script."""
import numpy as np
import tensorflow as tf
import dnnlib
import dnnlib.tflib as tflib
from dnnlib.tflib.autosummary import autosummary
from training import dataset
from training import misc
from metrics import metric_base
#----------------------------------------------------------------------------
# Just-in-time processing of training images before feeding them to the networks.
def process_reals(x, labels, lod, mirror_augment, drange_data, drange_net):
with tf.name_scope('DynamicRange'):
x = tf.cast(x, tf.float32)
x = misc.adjust_dynamic_range(x, drange_data, drange_net)
if mirror_augment:
with tf.name_scope('MirrorAugment'):
x = tf.where(tf.random_uniform([tf.shape(x)[0]]) < 0.5, x, tf.reverse(x, [3]))
with tf.name_scope('FadeLOD'): # Smooth crossfade between consecutive levels-of-detail.
s = tf.shape(x)
y = tf.reshape(x, [-1, s[1], s[2]//2, 2, s[3]//2, 2])
y = tf.reduce_mean(y, axis=[3, 5], keepdims=True)
y = tf.tile(y, [1, 1, 1, 2, 1, 2])
y = tf.reshape(y, [-1, s[1], s[2], s[3]])
x = tflib.lerp(x, y, lod - tf.floor(lod))
with tf.name_scope('UpscaleLOD'): # Upscale to match the expected input/output size of the networks.
s = tf.shape(x)
factor = tf.cast(2 ** tf.floor(lod), tf.int32)
x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1])
x = tf.tile(x, [1, 1, 1, factor, 1, factor])
x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor])
return x, labels
#----------------------------------------------------------------------------
# Evaluate time-varying training parameters.
def training_schedule(
cur_nimg,
training_set,
lod_initial_resolution = None, # Image resolution used at the beginning.
lod_training_kimg = 600, # Thousands of real images to show before doubling the resolution.
lod_transition_kimg = 600, # Thousands of real images to show when fading in new layers.
minibatch_size_base = 32, # Global minibatch size.
minibatch_size_dict = {}, # Resolution-specific overrides.
minibatch_gpu_base = 4, # Number of samples processed at a time by one GPU.
minibatch_gpu_dict = {}, # Resolution-specific overrides.
G_lrate_base = 0.002, # Learning rate for the generator.
G_lrate_dict = {}, # Resolution-specific overrides.
D_lrate_base = 0.002, # Learning rate for the discriminator.
D_lrate_dict = {}, # Resolution-specific overrides.
lrate_rampup_kimg = 0, # Duration of learning rate ramp-up.
tick_kimg_base = 4, # Default interval of progress snapshots.
tick_kimg_dict = {8:28, 16:24, 32:20, 64:16, 128:12, 256:8, 512:6, 1024:4}): # Resolution-specific overrides.
# Initialize result dict.
s = dnnlib.EasyDict()
s.kimg = cur_nimg / 1000.0
# Training phase.
phase_dur = lod_training_kimg + lod_transition_kimg
phase_idx = int(np.floor(s.kimg / phase_dur)) if phase_dur > 0 else 0
phase_kimg = s.kimg - phase_idx * phase_dur
# Level-of-detail and resolution.
if lod_initial_resolution is None:
s.lod = 0.0
else:
s.lod = training_set.resolution_log2
s.lod -= np.floor(np.log2(lod_initial_resolution))
s.lod -= phase_idx
if lod_transition_kimg > 0:
s.lod -= max(phase_kimg - lod_training_kimg, 0.0) / lod_transition_kimg
s.lod = max(s.lod, 0.0)
s.resolution = 2 ** (training_set.resolution_log2 - int(np.floor(s.lod)))
# Minibatch size.
s.minibatch_size = minibatch_size_dict.get(s.resolution, minibatch_size_base)
s.minibatch_gpu = minibatch_gpu_dict.get(s.resolution, minibatch_gpu_base)
# Learning rate.
s.G_lrate = G_lrate_dict.get(s.resolution, G_lrate_base)
s.D_lrate = D_lrate_dict.get(s.resolution, D_lrate_base)
if lrate_rampup_kimg > 0:
rampup = min(s.kimg / lrate_rampup_kimg, 1.0)
s.G_lrate *= rampup
s.D_lrate *= rampup
# Other parameters.
s.tick_kimg = tick_kimg_dict.get(s.resolution, tick_kimg_base)
return s
#----------------------------------------------------------------------------
# Main training script.
def training_loop(
G_args = {}, # Options for generator network.
D_args = {}, # Options for discriminator network.
G_opt_args = {}, # Options for generator optimizer.
D_opt_args = {}, # Options for discriminator optimizer.
G_loss_args = {}, # Options for generator loss.
D_loss_args = {}, # Options for discriminator loss.
dataset_args = {}, # Options for dataset.load_dataset().
sched_args = {}, # Options for train.TrainingSchedule.
grid_args = {}, # Options for train.setup_snapshot_image_grid().
metric_arg_list = [], # Options for MetricGroup.
tf_config = {}, # Options for tflib.init_tf().
data_dir = None, # Directory to load datasets from.
G_smoothing_kimg = 10.0, # Half-life of the running average of generator weights.
minibatch_repeats = 4, # Number of minibatches to run before adjusting training parameters.
lazy_regularization = True, # Perform regularization as a separate training step?
G_reg_interval = 4, # How often the perform regularization for G? Ignored if lazy_regularization=False.
D_reg_interval = 16, # How often the perform regularization for D? Ignored if lazy_regularization=False.
reset_opt_for_new_lod = True, # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced?
total_kimg = 25000, # Total length of the training, measured in thousands of real images.
mirror_augment = False, # Enable mirror augment?
drange_net = [-1,1], # Dynamic range used when feeding image data to the networks.
image_snapshot_ticks = 50, # How often to save image snapshots? None = only save 'reals.png' and 'fakes-init.png'.
network_snapshot_ticks = 50, # How often to save network snapshots? None = only save 'networks-final.pkl'.
save_tf_graph = False, # Include full TensorFlow computation graph in the tfevents file?
save_weight_histograms = False, # Include weight histograms in the tfevents file?
resume_pkl = None, # Network pickle to resume training from, None = train from scratch.
resume_kimg = 0.0, # Assumed training progress at the beginning. Affects reporting and training schedule.
resume_time = 0.0, # Assumed wallclock time at the beginning. Affects reporting.
resume_with_new_nets = False): # Construct new networks according to G_args and D_args before resuming training?
# Initialize dnnlib and TensorFlow.
tflib.init_tf(tf_config)
num_gpus = dnnlib.submit_config.num_gpus
# Load training set.
training_set = dataset.load_dataset(data_dir=dnnlib.convert_path(data_dir), verbose=True, **dataset_args)
grid_size, grid_reals, grid_labels = misc.setup_snapshot_image_grid(training_set, **grid_args)
misc.save_image_grid(grid_reals, dnnlib.make_run_dir_path('reals.png'), drange=training_set.dynamic_range, grid_size=grid_size)
# Construct or load networks.
with tf.device('/gpu:0'):
if resume_pkl is None or resume_with_new_nets:
print('Constructing networks...')
G = tflib.Network('G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **G_args)
D = tflib.Network('D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **D_args)
Gs = G.clone('Gs')
if resume_pkl is not None:
print('Loading networks from "%s"...' % resume_pkl)
rG, rD, rGs = misc.load_pkl(resume_pkl)
if resume_with_new_nets: G.copy_vars_from(rG); D.copy_vars_from(rD); Gs.copy_vars_from(rGs)
else: G = rG; D = rD; Gs = rGs
# Print layers and generate initial image snapshot.
G.print_layers(); D.print_layers()
sched = training_schedule(cur_nimg=total_kimg*1000, training_set=training_set, **sched_args)
grid_latents = np.random.randn(np.prod(grid_size), *G.input_shape[1:])
grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu)
misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes_init.png'), drange=drange_net, grid_size=grid_size)
# Setup training inputs.
print('Building TensorFlow graph...')
with tf.name_scope('Inputs'), tf.device('/cpu:0'):
lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[])
lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[])
minibatch_size_in = tf.placeholder(tf.int32, name='minibatch_size_in', shape=[])
minibatch_gpu_in = tf.placeholder(tf.int32, name='minibatch_gpu_in', shape=[])
minibatch_multiplier = minibatch_size_in // (minibatch_gpu_in * num_gpus)
Gs_beta = 0.5 ** tf.div(tf.cast(minibatch_size_in, tf.float32), G_smoothing_kimg * 1000.0) if G_smoothing_kimg > 0.0 else 0.0
# Setup optimizers.
G_opt_args = dict(G_opt_args)
D_opt_args = dict(D_opt_args)
for args, reg_interval in [(G_opt_args, G_reg_interval), (D_opt_args, D_reg_interval)]:
args['minibatch_multiplier'] = minibatch_multiplier
args['learning_rate'] = lrate_in
if lazy_regularization:
mb_ratio = reg_interval / (reg_interval + 1)
args['learning_rate'] *= mb_ratio
if 'beta1' in args: args['beta1'] **= mb_ratio
if 'beta2' in args: args['beta2'] **= mb_ratio
G_opt = tflib.Optimizer(name='TrainG', **G_opt_args)
D_opt = tflib.Optimizer(name='TrainD', **D_opt_args)
G_reg_opt = tflib.Optimizer(name='RegG', share=G_opt, **G_opt_args)
D_reg_opt = tflib.Optimizer(name='RegD', share=D_opt, **D_opt_args)
# Build training graph for each GPU.
data_fetch_ops = []
for gpu in range(num_gpus):
with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu):
# Create GPU-specific shadow copies of G and D.
G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow')
D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow')
# Fetch training data via temporary variables.
with tf.name_scope('DataFetch'):
sched = training_schedule(cur_nimg=int(resume_kimg*1000), training_set=training_set, **sched_args)
reals_var = tf.Variable(name='reals', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu] + training_set.shape))
labels_var = tf.Variable(name='labels', trainable=False, initial_value=tf.zeros([sched.minibatch_gpu, training_set.label_size]))
reals_write, labels_write = training_set.get_minibatch_tf()
reals_write, labels_write = process_reals(reals_write, labels_write, lod_in, mirror_augment, training_set.dynamic_range, drange_net)
reals_write = tf.concat([reals_write, reals_var[minibatch_gpu_in:]], axis=0)
labels_write = tf.concat([labels_write, labels_var[minibatch_gpu_in:]], axis=0)
data_fetch_ops += [tf.assign(reals_var, reals_write)]
data_fetch_ops += [tf.assign(labels_var, labels_write)]
reals_read = reals_var[:minibatch_gpu_in]
labels_read = labels_var[:minibatch_gpu_in]
# Evaluate loss functions.
lod_assign_ops = []
if 'lod' in G_gpu.vars: lod_assign_ops += [tf.assign(G_gpu.vars['lod'], lod_in)]
if 'lod' in D_gpu.vars: lod_assign_ops += [tf.assign(D_gpu.vars['lod'], lod_in)]
with tf.control_dependencies(lod_assign_ops):
with tf.name_scope('G_loss'):
G_loss, G_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=G_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, **G_loss_args)
with tf.name_scope('D_loss'):
D_loss, D_reg = dnnlib.util.call_func_by_name(G=G_gpu, D=D_gpu, opt=D_opt, training_set=training_set, minibatch_size=minibatch_gpu_in, reals=reals_read, labels=labels_read, **D_loss_args)
# Register gradients.
if not lazy_regularization:
if G_reg is not None: G_loss += G_reg
if D_reg is not None: D_loss += D_reg
else:
if G_reg is not None: G_reg_opt.register_gradients(tf.reduce_mean(G_reg * G_reg_interval), G_gpu.trainables)
if D_reg is not None: D_reg_opt.register_gradients(tf.reduce_mean(D_reg * D_reg_interval), D_gpu.trainables)
G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables)
D_opt.register_gradients(tf.reduce_mean(D_loss), D_gpu.trainables)
# Setup training ops.
data_fetch_op = tf.group(*data_fetch_ops)
G_train_op = G_opt.apply_updates()
D_train_op = D_opt.apply_updates()
G_reg_op = G_reg_opt.apply_updates(allow_no_op=True)
D_reg_op = D_reg_opt.apply_updates(allow_no_op=True)
Gs_update_op = Gs.setup_as_moving_average_of(G, beta=Gs_beta)
# Finalize graph.
with tf.device('/gpu:0'):
try:
peak_gpu_mem_op = tf.contrib.memory_stats.MaxBytesInUse()
except tf.errors.NotFoundError:
peak_gpu_mem_op = tf.constant(0)
tflib.init_uninitialized_vars()
print('Initializing logs...')
summary_log = tf.summary.FileWriter(dnnlib.make_run_dir_path())
if save_tf_graph:
summary_log.add_graph(tf.get_default_graph())
if save_weight_histograms:
G.setup_weight_histograms(); D.setup_weight_histograms()
metrics = metric_base.MetricGroup(metric_arg_list)
print('Training for %d kimg...\n' % total_kimg)
dnnlib.RunContext.get().update('', cur_epoch=resume_kimg, max_epoch=total_kimg)
maintenance_time = dnnlib.RunContext.get().get_last_update_interval()
cur_nimg = int(resume_kimg * 1000)
cur_tick = -1
tick_start_nimg = cur_nimg
prev_lod = -1.0
running_mb_counter = 0
while cur_nimg < total_kimg * 1000:
if dnnlib.RunContext.get().should_stop(): break
# Choose training parameters and configure training ops.
sched = training_schedule(cur_nimg=cur_nimg, training_set=training_set, **sched_args)
assert sched.minibatch_size % (sched.minibatch_gpu * num_gpus) == 0
training_set.configure(sched.minibatch_gpu, sched.lod)
if reset_opt_for_new_lod:
if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(sched.lod) != np.ceil(prev_lod):
G_opt.reset_optimizer_state(); D_opt.reset_optimizer_state()
prev_lod = sched.lod
# Run training ops.
feed_dict = {lod_in: sched.lod, lrate_in: sched.G_lrate, minibatch_size_in: sched.minibatch_size, minibatch_gpu_in: sched.minibatch_gpu}
for _repeat in range(minibatch_repeats):
rounds = range(0, sched.minibatch_size, sched.minibatch_gpu * num_gpus)
run_G_reg = (lazy_regularization and running_mb_counter % G_reg_interval == 0)
run_D_reg = (lazy_regularization and running_mb_counter % D_reg_interval == 0)
cur_nimg += sched.minibatch_size
running_mb_counter += 1
# Fast path without gradient accumulation.
if len(rounds) == 1:
tflib.run([G_train_op, data_fetch_op], feed_dict)
if run_G_reg:
tflib.run(G_reg_op, feed_dict)
tflib.run([D_train_op, Gs_update_op], feed_dict)
if run_D_reg:
tflib.run(D_reg_op, feed_dict)
# Slow path with gradient accumulation.
else:
for _round in rounds:
tflib.run(G_train_op, feed_dict)
if run_G_reg:
for _round in rounds:
tflib.run(G_reg_op, feed_dict)
tflib.run(Gs_update_op, feed_dict)
for _round in rounds:
tflib.run(data_fetch_op, feed_dict)
tflib.run(D_train_op, feed_dict)
if run_D_reg:
for _round in rounds:
tflib.run(D_reg_op, feed_dict)
# Perform maintenance tasks once per tick.
done = (cur_nimg >= total_kimg * 1000)
if cur_tick < 0 or cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done:
cur_tick += 1
tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0
tick_start_nimg = cur_nimg
tick_time = dnnlib.RunContext.get().get_time_since_last_update()
total_time = dnnlib.RunContext.get().get_time_since_start() + resume_time
# Report progress.
print('tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %-6.1f gpumem %.1f' % (
autosummary('Progress/tick', cur_tick),
autosummary('Progress/kimg', cur_nimg / 1000.0),
autosummary('Progress/lod', sched.lod),
autosummary('Progress/minibatch', sched.minibatch_size),
dnnlib.util.format_time(autosummary('Timing/total_sec', total_time)),
autosummary('Timing/sec_per_tick', tick_time),
autosummary('Timing/sec_per_kimg', tick_time / tick_kimg),
autosummary('Timing/maintenance_sec', maintenance_time),
autosummary('Resources/peak_gpu_mem_gb', peak_gpu_mem_op.eval() / 2**30)))
autosummary('Timing/total_hours', total_time / (60.0 * 60.0))
autosummary('Timing/total_days', total_time / (24.0 * 60.0 * 60.0))
# Save snapshots.
if image_snapshot_ticks is not None and (cur_tick % image_snapshot_ticks == 0 or done):
grid_fakes = Gs.run(grid_latents, grid_labels, is_validation=True, minibatch_size=sched.minibatch_gpu)
misc.save_image_grid(grid_fakes, dnnlib.make_run_dir_path('fakes%06d.png' % (cur_nimg // 1000)), drange=drange_net, grid_size=grid_size)
if network_snapshot_ticks is not None and (cur_tick % network_snapshot_ticks == 0 or done):
pkl = dnnlib.make_run_dir_path('network-snapshot-%06d.pkl' % (cur_nimg // 1000))
misc.save_pkl((G, D, Gs), pkl)
metrics.run(pkl, run_dir=dnnlib.make_run_dir_path(), data_dir=dnnlib.convert_path(data_dir), num_gpus=num_gpus, tf_config=tf_config)
# Update summaries and RunContext.
metrics.update_autosummaries()
tflib.autosummary.save_summaries(summary_log, cur_nimg)
dnnlib.RunContext.get().update('%.2f' % sched.lod, cur_epoch=cur_nimg // 1000, max_epoch=total_kimg)
maintenance_time = dnnlib.RunContext.get().get_last_update_interval() - tick_time
# Save final snapshot.
misc.save_pkl((G, D, Gs), dnnlib.make_run_dir_path('network-final.pkl'))
# All done.
summary_log.close()
training_set.close()
#----------------------------------------------------------------------------