Repository: perplexingpegasus/ProGAN
Branch: master
Commit: 3fda528bfc9d
Files: 8
Total size: 53.4 KB
Directory structure:
gitextract_z63xd0wk/
├── README.md
├── feed_dict.py
├── make_video.py
├── ops.py
├── progan_v15.py
├── progan_v16.py
└── scripts/
├── downloader.py
└── image_reshape.py
================================================
FILE CONTENTS
================================================
================================================
FILE: README.md
================================================
# ProGAN
Implementation of Progressive Generative Adversarial Network based on research done by Tero Karras
The model was trained on landscape images collected from Reddit.
http://research.nvidia.com/sites/default/files/pubs/2017-10_Progressive-Growing-of/karras2018iclr-paper.pdf

================================================
FILE: feed_dict.py
================================================
import os
import pickle
import numpy as np
from itertools import cycle
'''
FeedDict handles several numpy mem_map arrays of image data saved within the directory. The arrays
should be named in the format "n1_n2.npy" where n1 x n1 is the resolution of the image data in the
array, and n2 is its number used for indexing purposes. Data should be of type np.float32 and scaled
between -1.0 and 1.0. In order to avoid loading unnecessary data into memory, only one mem_map is
loaded at a time.
'''
class FeedDict:
pickle_filename = 'fd_log.pkl'
def __init__(self, logdir, imgdir, z_length, n_examples, shuffle=True, min_size=4, max_size=1024):
self.logdir = logdir
self.shuffle = shuffle
self.z_length = z_length
self.sizes = [2 ** i for i in range(
int(np.log2(min_size)),
int(np.log2(max_size)) + 1
)]
files = os.listdir(imgdir)
self.arrays = dict()
for s in [2 ** i for i in range(2, 11)]:
path_list = []
for f in files:
if f.startswith('{}_'.format(s)):
path_list.append(os.path.join(imgdir, f))
if shuffle: np.random.shuffle(path_list)
self.arrays.update({s: cycle(path_list)})
self.z_fixed = self.z_batch(n_examples, z_length)
self.cur_res = None
self.cur_path = None
self.cur_array = None
self.cur_array_len = 0
self.idx = 0
@property
def n_sizes(self): return len(self.sizes)
def __change_res(self, res):
assert res in self.arrays.keys()
self.cur_res = res
self.__change_array()
def __change_array(self):
new_path = next(self.arrays[self.cur_res])
print('Loaded new memmap array: {}'.format(new_path))
if new_path != self.cur_path:
self.cur_path = new_path
self.cur_array = np.load(new_path)
self.cur_array_len = self.cur_array.shape[0]
if self.shuffle: np.random.shuffle(self.cur_array)
self.idx = 0
def z_batch(self, batch_size, random_state=None):
if random_state is not None:
np.random.seed(random_state)
return np.random.normal(0.0, 1.0, size=[batch_size, self.z_length])
def x_batch(self, batch_size, res):
if res != self.cur_res:
self.__change_res(res)
remaining = self.cur_array_len - self.idx
start = self.idx
if remaining >= batch_size:
stop = start + batch_size
batch = self.cur_array[start:stop]
else:
stop = batch_size - remaining
batch = self.cur_array[start:]
self.__change_array()
batch = np.concatenate((batch, self.cur_array[:stop]))
self.idx = stop
return batch
@classmethod
def load(cls, logdir, **kwargs):
path = os.path.join(logdir, cls.pickle_filename)
if os.path.exists(path):
with open(path, 'rb') as f:
fd = pickle.load(f)
if type(fd) == cls:
print('Restored feed_dict -------\n')
return fd
return cls(logdir, **kwargs)
def save(self):
path = os.path.join(self.logdir, self.pickle_filename)
with open(path, 'wb') as f:
pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)
================================================
FILE: make_video.py
================================================
from progan_v15 import ProGAN
import librosa
import numpy as np
from moviepy.video.VideoClip import VideoClip
from moviepy.editor import AudioFileClip
from sklearn.preprocessing import StandardScaler
def get_z_from_audio(audio, z_length, n_bins=60, hop_length=512, random_state=50):
np.random.seed(random_state)
if type(audio) == str:
audio, sr = librosa.load(audio)
y = librosa.core.cqt(audio, n_bins=n_bins, hop_length=hop_length)
mag, phase = librosa.core.magphase(y)
mag = mag.T
mag = StandardScaler().fit_transform(mag)
s0, s1 = mag.shape
static = np.random.normal(size=[z_length - s1])
static = np.tile(static, (s0, 1))
z = np.concatenate((mag, static), 1)
z = z.T
np.random.shuffle(z)
z = z.T
return z
def make_video(audio, filename, progan, n_bins=60, random_state=0, imgs_per_batch=20):
y, sr = librosa.load(audio)
song_length = len(y) / sr
z_audio = get_z_from_audio(y, z_length=progan.z_length, n_bins=n_bins, random_state=random_state)
fps = z_audio.shape[0] / song_length
res = progan.get_cur_res()
shape = (res, res * 16 // 9, 3)
imgs = np.zeros(shape=[imgs_per_batch, *shape], dtype=np.float32)
def make_frame(t):
global imgs
cur_frame_idx = int(t * fps)
if cur_frame_idx >= len(z_audio):
return np.zeros(shape=shape, dtype=np.uint8)
if cur_frame_idx % imgs_per_batch == 0:
imgs = progan.generate(z_audio[cur_frame_idx:cur_frame_idx + imgs_per_batch])
imgs = imgs[:, :, :res * 8 // 9, :]
imgs_rev = np.flip(imgs, 2)
imgs = np.concatenate((imgs, imgs_rev), 2)
return imgs[cur_frame_idx % imgs_per_batch]
video_clip = VideoClip(make_frame=make_frame, duration=song_length)
audio_clip = AudioFileClip(audio)
video_clip = video_clip.set_audio(audio_clip)
video_clip.write_videofile(filename, fps=fps)
if __name__ == '__main__':
progan = ProGAN(
logdir='logdir_v2',
imgdir='img_arrays',
)
make_video('videos\\eco_zones.mp3', 'eco_zones.mp4', progan, random_state=768)
================================================
FILE: ops.py
================================================
import tensorflow as tf
weight_init = tf.random_normal_initializer()
bias_init = tf.constant_initializer(0)
def conv(input, out_channels, filter_size=3, k=1, padding='SAME', mode=None, output_shape=None):
in_shape = tf.shape(input)
input_channels = int(input.get_shape()[1])
if mode == 'upscale' or mode == 'transpose':
filter_shape = [filter_size, filter_size, out_channels, input_channels]
else:
filter_shape = [filter_size, filter_size, input_channels, out_channels]
filter = tf.get_variable('filter', filter_shape, initializer=weight_init)
fan_in = float(filter_size ** 2 * input_channels)
filter = filter * tf.sqrt(2.0 / fan_in)
b = tf.get_variable('bias', [1, out_channels, 1, 1], initializer=bias_init)
if mode == 'upscale':
filter = tf.pad(filter, [[1, 1], [1, 1], [0, 0], [0, 0]], mode='CONSTANT')
filter = tf.add_n([filter[1:, 1:], filter[:-1, 1:], filter[1:, :-1], filter[:-1, :-1]])
output_shape = [in_shape[0], out_channels, in_shape[2] * 2, in_shape[3] * 2]
output = tf.nn.conv2d_transpose(input, filter, output_shape, [1, 1, 2, 2],
padding=padding, data_format='NCHW')
elif mode == 'downscale':
filter = tf.pad(filter, [[1, 1], [1, 1], [0, 0], [0, 0]], mode='CONSTANT')
filter = tf.add_n([filter[1:, 1:], filter[:-1, 1:], filter[1:, :-1], filter[:-1, :-1]])
filter *= 0.25
output = tf.nn.conv2d(input, filter, [1, 1, 2, 2], padding=padding, data_format='NCHW')
elif mode == 'transpose':
output = tf.nn.conv2d_transpose(input, filter, output_shape, [1, 1, k, k],
padding=padding, data_format='NCHW')
else:
output = tf.nn.conv2d(input, filter, [1, 1, k, k], padding=padding, data_format='NCHW')
output += b
if out_channels == 1:
output = tf.squeeze(output, 3)
return output
def dense(input, output_size):
fan_in = int(input.get_shape()[1])
W = tf.get_variable('W', [fan_in, output_size], initializer=weight_init)
W = W * tf.sqrt(2.0 / float(fan_in))
b = tf.get_variable('b', [1, output_size, 1, 1], initializer=bias_init)
return tf.matmul(input, W) + b
def leaky_relu(input, alpha=0.2):
return tf.nn.leaky_relu(input, alpha=alpha)
def pixelwise_norm(input):
pixel_var = tf.reduce_mean(tf.square(input), 1, keepdims=True)
return input / tf.sqrt(pixel_var + 1e-8)
def g_conv_layer(input, out_channels, **kwargs):
return pixelwise_norm(leaky_relu(conv(input, out_channels, **kwargs)))
def d_conv_layer(input, out_channels, **kwargs):
return leaky_relu(conv(input, out_channels, **kwargs))
def minibatch_stddev(input):
shape = tf.shape(input)
group_size = tf.minimum(4, shape[0])
x = tf.reshape(input, [group_size, -1, shape[1], shape[2], shape[3]])
mu = tf.reduce_mean(x, axis=0, keepdims=True)
sigma = tf.sqrt(tf.reduce_mean(tf.square(x - mu), axis=0) + 1e-8)
sigma_avg = tf.reduce_mean(sigma, axis=[1, 2, 3], keepdims=True)
sigma_avg = tf.tile(sigma_avg, [group_size, 1, shape[2], shape[3]])
return tf.concat((input, sigma_avg), axis=1)
def upscale(input):
shape = tf.shape(input)
channels = input.get_shape()[1]
output = tf.reshape(input, [-1, channels, shape[2], 1, shape[3], 1])
output = tf.tile(output, [1, 1, 1, 2, 1, 2])
return tf.reshape(output, [-1, channels, shape[2] * 2, shape[3] * 2])
def downscale(input):
return tf.nn.avg_pool(input, ksize=[1, 1, 2, 2], strides=[1, 1, 2, 2],
padding='SAME', data_format='NCHW')
def resize_images(input, dims=None):
if dims is None:
dims = tf.shape(input)[2] * 2, tf.shape(input)[3] * 2
return tf.image.resize_nearest_neighbor(input, dims)
def scale_uint8(input):
input = tf.to_float(input)
return (input / 127.5) - 1
def tensor_to_imgs(input, switch_dims=True):
if switch_dims: input = tf.transpose(input, (0, 2, 3, 1))
imgs = tf.minimum(tf.maximum(input, -tf.ones_like(input)), tf.ones_like(input))
imgs = (imgs + 1) * 127.5
return tf.cast(imgs, tf.uint8)
================================================
FILE: progan_v15.py
================================================
import os
import datetime as dt
# Operations used in building the network. Many are not used in the current model
from ops import *
# FeedDict object used to continuously provide new training data
from feed_dict import FeedDict
# TODO: add argparser and flags
# TODO: refactor training function
# TODO: train next version of model using reset_optimizer=True
class ProGAN:
def __init__(self,
logdir, # directory of stored models
imgdir, # directory of images for FeedDict
learning_rate=0.001, # Adam optimizer learning rate
beta1=0, # Adam optimizer beta1
beta2=0.99, # Adam optimizer beta2
w_lambda=10.0, # WGAN-GP/LP lambda
w_gamma=1.0, # WGAN-GP/LP gamma
epsilon=0.001, # WGAN-GP/LP lambda
z_length=512, # latent variable size
n_imgs=800000, # number of images to show in each growth step
batch_repeats=1, # number of times to repeat minibatch
n_examples=24, # number of example images to generate
lipschitz_penalty=True, # if True, use WGAN-LP instead of WGAN-GP
big_image=True, # Generate a single large preview image, only works if n_examples = 24
scaling_factor=None, # factor to scale down number of trainable parameters
reset_optimizer=False, # reset optimizer variables with each new layer
):
# Scale down the number of factors if scaling_factor is provided
self.channels = [512, 512, 512, 512, 256, 128, 64, 32, 16, 8]
if scaling_factor:
assert scaling_factor > 1
self.channels = [max(4, c // scaling_factor) for c in self.channels]
self.batch_size = [16, 16, 16, 16, 16, 16, 8, 4, 3]
self.z_length = z_length
self.n_examples = n_examples
self.batch_repeats = batch_repeats if batch_repeats else 1
self.n_imgs = n_imgs
self.logdir = logdir
self.big_image = big_image
self.w_lambda = w_lambda
self.w_gamma = w_gamma
self.epsilon = epsilon
self.reset_optimizer=reset_optimizer
self.lipschitz_penalty = lipschitz_penalty
self.start = True
# Generate fized latent variables for image previews
np.random.seed(0)
self.z_fixed = np.random.normal(size=[self.n_examples, self.z_length])
# Initialize placeholders
self.x_placeholder = tf.placeholder(tf.float32, [None, None, None, 3])
self.z_placeholder = tf.placeholder(tf.float32, [None, self.z_length])
# Global step
with tf.variable_scope('global_step'):
self.global_step = tf.Variable(0, name='global_step', trainable=False)
self.global_step_op = tf.assign(self.global_step, tf.add(self.global_step, 1))
# Non-trainable variables for counting to next layer and incrementing value of alpha
with tf.variable_scope('image_count'):
self.total_imgs = tf.Variable(0.0, name='image_step', trainable=False)
self.img_count_placeholder = tf.placeholder(tf.float32)
self.img_step_op = tf.assign(self.total_imgs,
tf.add(self.total_imgs, self.img_count_placeholder))
self.img_step = tf.mod(tf.add(self.total_imgs, self.n_imgs), self.n_imgs * 2)
self.alpha = tf.minimum(1.0, tf.div(self.img_step, self.n_imgs))
self.layer = tf.floor_div(tf.add(self.total_imgs, self.n_imgs), self.n_imgs * 2)
# Initialize optimizer as member variable if not rest_optimizer, otherwise generate new
# optimizer for each layer
if self.reset_optimizer:
self.lr = learning_rate
self.beta1 = beta1
self.beta2 = beta2
else:
self.g_optimizer = tf.train.AdamOptimizer(learning_rate, beta1, beta2)
self.d_optimizer = tf.train.AdamOptimizer(learning_rate, beta1, beta2)
# Initialize FeedDict
self.feed = FeedDict.load(imgdir, logdir)
self.n_layers = int(np.log2(1024)) - 1
self.networks = [self._create_network(i + 1) for i in range(self.n_layers)]
# Initialize Session, FileWriter and Saver
self.sess = tf.Session()
self.sess.run(tf.global_variables_initializer())
self.writer = tf.summary.FileWriter(self.logdir, graph=self.sess.graph)
self.saver = tf.train.Saver()
# Look in logdir to see if a saved model already exists. If so, load it
try:
self.saver.restore(self.sess, tf.train.latest_checkpoint(self.logdir))
print('Restored ----------------\n')
except Exception:
pass
# Function for fading input of current layer into previous layer based on current value of alpha
def _reparameterize(self, x0, x1):
return tf.add(
tf.scalar_mul(tf.subtract(1.0, self.alpha), x0),
tf.scalar_mul(self.alpha, x1)
)
# Function for creating network layout at each layer
def _create_network(self, layers):
# Build the generator for this layer
def generator(z):
with tf.variable_scope('Generator'):
with tf.variable_scope('latent_vector'):
z = tf.expand_dims(z, 1)
g1 = tf.expand_dims(z, 2)
for i in range(layers):
with tf.variable_scope('layer_{}'.format(i)):
if i > 0:
g1 = resize(g1)
if i == layers - 1 and layers > 1:
g0 = g1
with tf.variable_scope('1'):
if i == 0:
g1 = pixelwise_norm(leaky_relu(conv2d_transpose(
g1, [tf.shape(g1)[0], 4, 4, self.channels[0]])))
else:
g1 = pixelwise_norm(leaky_relu(conv2d(g1, self.channels[i])))
with tf.variable_scope('2'):
g1 = pixelwise_norm(leaky_relu(conv2d(g1, self.channels[i])))
with tf.variable_scope('rgb_layer_{}'.format(layers - 1)):
g1 = conv2d(g1, 3, 1, weight_norm=False)
if layers > 1:
with tf.variable_scope('rgb_layer_{}'.format(layers - 2)):
g0 = conv2d(g0, 3, 1, weight_norm=False)
g = self._reparameterize(g0, g1)
else:
g = g1
return g
# Build the discriminator for this layer
def discriminator(x):
with tf.variable_scope('Discriminator'):
if layers > 1:
with tf.variable_scope('rgb_layer_{}'.format(layers - 2)):
d0 = avg_pool(x)
d0 = leaky_relu(conv2d(d0, self.channels[layers - 1], 1))
with tf.variable_scope('rgb_layer_{}'.format(layers - 1)):
d1 = leaky_relu(conv2d(x, self.channels[layers], 1))
for i in reversed(range(layers)):
with tf.variable_scope('layer_{}'.format(i)):
if i == 0:
d1 = minibatch_stddev(d1)
with tf.variable_scope('1'):
d1 = leaky_relu(conv2d(d1, self.channels[i]))
with tf.variable_scope('2'):
if i == 0:
d1 = leaky_relu(conv2d(d1, self.channels[i], 4, padding='VALID'))
else:
d1 = leaky_relu(conv2d(d1, self.channels[i]))
if i != 0:
d1 = avg_pool(d1)
if i == layers - 1 and layers > 1:
d1 = self._reparameterize(d0, d1)
with tf.variable_scope('dense'):
d = tf.reshape(d1, [-1, self.channels[0]])
d = dense_layer(d, 1)
return d
# image dimensions
dim = 2 ** (layers + 1)
# Build the current network
with tf.variable_scope('Network', reuse=tf.AUTO_REUSE):
Gz = generator(self.z_placeholder)
Dz = discriminator(Gz)
# Mix different resolutions of input images according to value of alpha
with tf.variable_scope('reshape'):
if layers > 1:
x0 = resize(self.x_placeholder, (dim // 2, dim // 2))
x0 = resize(x0, (dim, dim))
x1 = resize(self.x_placeholder, (dim, dim))
x = self._reparameterize(x0, x1)
else:
x = resize(self.x_placeholder, (dim, dim))
Dx = discriminator(x)
# Fake and real image mixing for WGAN-GP loss function
interp = tf.random_uniform(shape=[tf.shape(Dz)[0], 1, 1, 1], minval=0., maxval=1.)
x_hat = interp * x + (1 - interp) * Gz
Dx_hat = discriminator(x_hat)
# Loss function and scalar summaries
with tf.variable_scope('Loss_Function'):
# Wasserstein Distance
wd = Dz - Dx
# Gradient/Lipschitz Penalty
grads = tf.gradients(Dx_hat, [x_hat])[0]
slopes = tf.sqrt(tf.reduce_sum(tf.square(grads), [1, 2, 3]))
if self.lipschitz_penalty:
gp = tf.square(tf.maximum((slopes - self.w_gamma) / self.w_gamma, 0))
else:
gp = tf.square((slopes - self.w_gamma) / self.w_gamma)
gp_scaled = self.w_lambda * gp
# Epsilon penalty keeps discriminator output for drifting too far away from zero
epsilon_cost = self.epsilon * tf.square(Dx)
# Cost and summary scalars
g_cost = tf.reduce_mean(-Dz)
d_cost = tf.reduce_mean(wd + gp_scaled + epsilon_cost)
wd = tf.abs(tf.reduce_mean(wd))
gp = tf.reduce_mean(gp)
# Summaries
wd_sum = tf.summary.scalar('Wasserstein_distance_{}x{}'.format(dim, dim), wd)
gp_sum = tf.summary.scalar('gradient_penalty_{}x{}'.format(dim, dim), gp)
# Collecting variables to be trained by optimizers
g_vars, d_vars = [], []
var_scopes = ['layer_{}'.format(i) for i in range(layers)]
var_scopes.extend(['dense', 'rgb_layer_{}'.format(layers - 1), 'rgb_layer_{}'.format(layers - 2)])
for scope in var_scopes:
g_vars.extend(tf.get_collection(
tf.GraphKeys.GLOBAL_VARIABLES,
scope='Network/Generator/{}'.format(scope)))
d_vars.extend(tf.get_collection(
tf.GraphKeys.GLOBAL_VARIABLES,
scope='Network/Discriminator/{}'.format(scope)))
# Generate optimizer operations
# if self.reset_optimizer is True then initialize a new optimizer for each layer
with tf.variable_scope('Optimize'):
if self.reset_optimizer:
g_train = tf.train.AdamOptimizer(
self.lr, self.beta1, self.beta2, name='G_optimizer_{}'.format(layers - 1)).minimize(
g_cost, var_list=g_vars)
d_train = tf.train.AdamOptimizer(
self.lr, self.beta1, self.beta2, name='D_optimizer_{}'.format(layers - 1)).minimize(
d_cost, var_list=d_vars)
else:
g_train = self.g_optimizer.minimize(g_cost, var_list=g_vars)
d_train = self.d_optimizer.minimize(d_cost, var_list=d_vars)
# Print variable names to before running model
print([var.name for var in g_vars])
print([var.name for var in d_vars])
# Generate preview images
with tf.variable_scope('image_preview'):
fake_imgs = tf.minimum(tf.maximum(Gz, -tf.ones_like(Gz)), tf.ones_like(Gz))
real_imgs = x[:min(self.batch_size[layers - 1], 4), :, :, :]
# Upsize images to normal visibility
if dim < 256:
fake_imgs = resize(fake_imgs, (256, 256))
real_imgs = resize(real_imgs, (256, 256))
# Concatenate images into one large image for preview, only used if 24 preview images are requested
if self.big_image and self.n_examples == 24:
fake_img_list = tf.unstack(fake_imgs, num=24)
fake_img_list = [tf.concat(fake_img_list[6 * i:6 * (i + 1)], 1) for i in range(4)]
fake_imgs = tf.concat(fake_img_list, 0)
fake_imgs = tf.expand_dims(fake_imgs, 0)
real_img_list = tf.unstack(real_imgs, num=min(self.batch_size[layers - 1], 4))
real_imgs = tf.concat(real_img_list, 1)
real_imgs = tf.expand_dims(real_imgs, 0)
# images summaries
fake_img_sum = tf.summary.image('fake{}x{}'.format(dim, dim), fake_imgs, self.n_examples)
real_img_sum = tf.summary.image('real{}x{}'.format(dim, dim), real_imgs, 4)
return (dim, wd, gp, wd_sum, gp_sum, g_train, d_train,
fake_img_sum, real_img_sum, Gz, discriminator)
# Summary adding function
def _add_summary(self, string, gs):
self.writer.add_summary(string, gs)
# Latent variable 'z' generator
def _z(self, batch_size):
return np.random.normal(0.0, 1.0, [batch_size, self.z_length])
# Main training function
def train(self):
prev_layer = None
start_time = dt.datetime.now()
total_imgs = self.sess.run(self.total_imgs)
while total_imgs < (self.n_layers - 0.5) * self.n_imgs * 2:
# Get current layer, global step, alpha and total number of images used so far
layer, gs, img_step, alpha, total_imgs = self.sess.run([
self.layer, self.global_step, self.img_step, self.alpha, self.total_imgs])
layer = int(layer)
# Global step interval to save model and generate image previews
save_interval = max(1000, 10000 // 2 ** layer)
# Get network operations and loss functions for current layer
(dim, wd, gp, wd_sum, gp_sum, g_train, d_train,
fake_img_sum, real_img_sum, Gz, discriminator) = self.networks[layer]
# Get training data and latent variables to store in feed_dict
feed_dict = {self.x_placeholder: self.feed.next_batch(self.batch_size[layer], dim),
self.z_placeholder: self._z(self.batch_size[layer])}
# Reset start times if a new layer has begun training
if layer != prev_layer:
start_time = dt.datetime.now()
# Here's where we actually train the model
for _ in range(self.batch_repeats):
self.sess.run(g_train, feed_dict)
self.sess.run(d_train, feed_dict)
# Get loss values and summaries
wd_, gp_, wd_sum_str, gp_sum_str = self.sess.run([wd, gp, wd_sum, gp_sum], feed_dict)
# Print current status, loss functions, etc.
percent_done = np.round(img_step * 50 / self.n_imgs, 4)
imgs_done = int(img_step)
cur_layer_imgs = self.n_imgs * 2
if dim == 4:
percent_done = np.round((percent_done - 50) * 2, 4)
imgs_done -= self.n_imgs
cur_layer_imgs //= 2
print('dimensions: {}x{} ---- {}% ---- images: {}/{} ---- alpha: {} ---- global step: {}'
'\nWasserstein distance: {}\ngradient penalty: {}\n'.format(
dim, dim, percent_done, imgs_done, cur_layer_imgs, alpha, gs, wd_, gp_))
# Log scalar data every 20 global steps
if gs % 20 == 0:
self._add_summary(wd_sum_str, gs)
self._add_summary(gp_sum_str, gs)
# Operations to run every save interval
if gs % save_interval == 0:
# Do not save the model or generate images immediately after loading/preloading
if self.start:
self.start = False
# Save the model and generate image previews
else:
print('saving and making images...\n')
self.feed.save()
self.saver.save(
self.sess, os.path.join(self.logdir, "model.ckpt"),
global_step=self.global_step)
real_img_sum_str = self.sess.run(real_img_sum, feed_dict)
img_preview_feed_dict = {
self.x_placeholder: feed_dict[self.x_placeholder][:4],
self.z_placeholder: self.z_fixed}
fake_img_sum_str = self.sess.run(fake_img_sum, img_preview_feed_dict)
self._add_summary(fake_img_sum_str, gs)
self._add_summary(real_img_sum_str, gs)
# Increment image count and global step variables
img_count = self.batch_repeats * self.batch_size[layer]
self.sess.run(self.global_step_op)
self.sess.run(self.img_step_op, {self.img_count_placeholder: img_count})
# Calculate and print estimated time remaining
prev_layer = layer
avg_time = (dt.datetime.now() - start_time) / (imgs_done + self.batch_size[layer])
steps_remaining = cur_layer_imgs - imgs_done
time_reamining = avg_time * steps_remaining
print('est. time remaining on current layer: {}'.format(time_reamining))
def get_cur_res(self):
cur_layer = int(self.sess.run(self.layer))
return 2 ** (2 + cur_layer)
# Function for generating images from a 1D or 2D array of latent vectors
def generate(self, z):
if len(z.shape) == 1:
z = np.expand_dims(z, 0)
cur_layer = int(self.sess.run(self.layer))
G = self.networks[cur_layer][9]
imgs = self.sess.run(G, {self.z_placeholder: z})
imgs = np.minimum(imgs, 1.0)
imgs = np.maximum(imgs, -1.0)
imgs = (imgs + 1) * 255 / 2
imgs = np.uint8(imgs)
if imgs.shape[0] == 1:
imgs = np.squeeze(imgs, 0)
return imgs
def transform(self, input_img, n_iter=100000):
with tf.variable_scope('transform'):
global_step = tf.Variable(0, name='transform_global_step', trainable=False)
transform_img = tf.Variable(input_img, name='transform_img', dtype=tf.float32)
cur_layer = int(self.sess.run(self.layer))
(dim, wd, gp, wd_sum, gp_sum, g_train, d_train,
ake_img_sum, real_img_sum, Gz, discriminator) = self.networks[cur_layer]
with tf.variable_scope('Network', reuse=tf.AUTO_REUSE):
with tf.variable_scope('resize'):
jitter = tf.random_uniform([2], -10, 10, tf.int32)
img = tf.manip.roll(transform_img, jitter, [1, 2])
img = resize(img, (dim, dim))
Dt = discriminator(img)
t_cost = tf.reduce_mean(-Dt)
tc_sum = tf.summary.scalar('transform_cost_{}x{}'.format(dim, dim), t_cost)
t_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='transform/transform_img')
t_train = tf.train.AdamOptimizer(0.0001).minimize(
t_cost, var_list=t_vars, global_step=global_step)
transform_img_sum = tf.summary.image('transform', transform_img)
self.sess.run(tf.global_variables_initializer())
for i in range(n_iter):
gs, t_cost_, tc_sum_str, _ = self.sess.run([global_step, t_cost, tc_sum, t_train])
print('Global step: {}, cost: {}\n\n'.format(gs, t_cost_))
if i % 20 == 0:
self._add_summary(tc_sum_str, gs)
if i % 1000 == 0:
img_sum_str = self.sess.run(transform_img_sum)
self._add_summary(img_sum_str, gs)
if __name__ == '__main__':
progan = ProGAN(
logdir='logdir_v2',
imgdir='img_arrays',
)
# progan = ProGAN(
# logdir='logdir_v3',
# imgdir='img_arrays_botanical',
# reset_optimizer=True
# )
progan.train()
================================================
FILE: progan_v16.py
================================================
import datetime as dt
import os
import numpy as np
# Operations used in building the network. Many are not used in the current model
from ops import *
# FeedDict object used to continuously provide new training data
from feed_dict import FeedDict
# TODO: add argparser and flags
class ProGAN:
def __init__(self,
logdir, # directory of stored models
imgdir, # directory of images for FeedDict
learning_rate=0.001, # Adam optimizer learning rate
beta1=0, # Adam optimizer beta1
beta2=0.99, # Adam optimizer beta2
w_lambda=10.0, # WGAN-GP/LP lambda
w_gamma=1.0, # WGAN-GP/LP gamma
epsilon=0.001, # WGAN-GP/LP lambda
z_length=512, # latent variable size
n_imgs=800000, # number of images to show in each growth step
batch_repeats=1, # number of times to repeat minibatch
n_examples=24, # number of example images to generate
lipschitz_penalty=True, # if True, use WGAN-LP instead of WGAN-GP
big_image=True, # Generate a single large preview image, only works if n_examples = 24
reset_optimizer=True, # reset optimizer variables with each new layer
batch_sizes=None,
channels=None,
):
# Scale down the number of factors if scaling_factor is provided
self.channels = channels if channels else [512, 512, 512, 512, 256, 128, 64, 32, 16, 16]
self.batch_sizes = batch_sizes if batch_sizes else [16, 16, 16, 16, 16, 16, 12, 4, 3]
self.z_length = z_length
self.n_examples = n_examples
self.batch_repeats = batch_repeats if batch_repeats else 1
self.n_imgs = n_imgs
self.logdir = logdir
self.big_image = big_image
self.w_lambda = w_lambda
self.w_gamma = w_gamma
self.epsilon = epsilon
self.reset_optimizer=reset_optimizer
self.lipschitz_penalty = lipschitz_penalty
# Initialize FeedDict
self.feed = FeedDict.load(logdir, imgdir=imgdir, z_length=z_length, n_examples=n_examples)
self.n_layers = self.feed.n_sizes
self.max_imgs = (self.n_layers - 0.5) * self.n_imgs * 2
# Initialize placeholders
self.x_placeholder = tf.placeholder(tf.uint8, [None, 3, None, None])
self.z_placeholder = tf.placeholder(tf.float32, [None, self.z_length])
# Global step
with tf.variable_scope('global_step'):
self.global_step = tf.Variable(0, name='global_step', trainable=False, dtype=tf.int32)
# Non-trainable variables for counting to next layer and incrementing value of alpha
with tf.variable_scope('image_count'):
self.total_imgs = tf.Variable(0, name='total_images', trainable=False, dtype=tf.int32)
img_offset = tf.add(self.total_imgs, self.n_imgs)
imgs_per_layer = self.n_imgs * 2
self.img_step = tf.mod(img_offset, imgs_per_layer)
self.layer = tf.minimum(tf.floor_div(img_offset, imgs_per_layer), self.n_layers - 1)
fade_in = tf.to_float(self.img_step) / float(self.n_imgs)
self.alpha = tf.minimum(1.0, tf.maximum(0.0, fade_in))
# Initialize optimizer as member variable if not rest_optimizer, otherwise generate new
# optimizer for each layer
if self.reset_optimizer:
self.lr = learning_rate
self.beta1 = beta1
self.beta2 = beta2
else:
self.g_optimizer = tf.train.AdamOptimizer(learning_rate, beta1, beta2)
self.d_optimizer = tf.train.AdamOptimizer(learning_rate, beta1, beta2)
self.networks = [self.create_network(i + 1) for i in range(self.n_layers)]
# Initialize Session, FileWriter and Saver
self.sess = tf.Session()
self.sess.run(tf.global_variables_initializer())
self.writer = tf.summary.FileWriter(self.logdir, graph=self.sess.graph)
self.saver = tf.train.Saver()
# Look in logdir to see if a saved model already exists. If so, load it
try:
self.saver.restore(self.sess, tf.train.latest_checkpoint(self.logdir))
print('Restored model -----------\n')
except Exception:
pass
# Function for fading input of current layer into previous layer based on current value of alpha
def reparameterize(self, x0, x1):
return tf.add(
tf.scalar_mul(tf.subtract(1.0, self.alpha), x0),
tf.scalar_mul(self.alpha, x1)
)
# Build a generator for n layers
def generator(self, z, n_layers):
with tf.variable_scope('Generator'):
with tf.variable_scope('latent_vector'):
z = tf.expand_dims(z, 2)
g1 = tf.expand_dims(z, 3)
for i in range(n_layers):
with tf.variable_scope('layer_{}'.format(i)):
if i == n_layers - 1:
g0 = g1
with tf.variable_scope('1'):
if i == 0:
g1 = g_conv_layer(g1, self.channels[i],
filter_size=4, padding='VALID', mode='transpose',
output_shape=[tf.shape(g1)[0], self.channels[i], 4, 4])
else:
g1 = g_conv_layer(g1, self.channels[i], mode='upscale')
with tf.variable_scope('2'):
g1 = g_conv_layer(g1, self.channels[i])
with tf.variable_scope('rgb_layer_{}'.format(n_layers - 1)):
g1 = conv(g1, 3, filter_size=1)
if n_layers > 1:
with tf.variable_scope('rgb_layer_{}'.format(n_layers - 2)):
g0 = conv(g0, 3, filter_size=1)
g0 = upscale(g0)
g = self.reparameterize(g0, g1)
else:
g = g1
return g
# Build a discriminator n layers
def discriminator(self, x, n_layers):
with tf.variable_scope('Discriminator'):
if n_layers > 1:
with tf.variable_scope('rgb_layer_{}'.format(n_layers - 2)):
d0 = downscale(x)
d0 = d_conv_layer(d0, self.channels[n_layers - 1], filter_size=1)
with tf.variable_scope('rgb_layer_{}'.format(n_layers - 1)):
d1 = d_conv_layer(x, self.channels[n_layers], filter_size=1)
for i in reversed(range(n_layers)):
with tf.variable_scope('layer_{}'.format(i)):
if i == 0:
d1 = minibatch_stddev(d1)
with tf.variable_scope('1'):
d1 = d_conv_layer(d1, self.channels[i])
with tf.variable_scope('2'):
if i == 0:
d1 = d_conv_layer(d1, self.channels[0],
filter_size=4, padding='VALID')
else:
d1 = d_conv_layer(d1, self.channels[i], mode='downscale')
if i == n_layers - 1 and n_layers > 1:
d1 = self.reparameterize(d0, d1)
with tf.variable_scope('dense'):
d = tf.reshape(d1, [-1, self.channels[0]])
d = dense(d, 1)
return d
# Function for creating network layout at each layer
def create_network(self, n_layers):
# image dimensions
dim = 2 ** (n_layers + 1)
# Build the current network
with tf.variable_scope('Network', reuse=tf.AUTO_REUSE):
Gz = self.generator(self.z_placeholder, n_layers)
Dz = self.discriminator(Gz, n_layers)
# Mix different resolutions of input images according to value of alpha
with tf.variable_scope('training_images'):
x = scale_uint8(self.x_placeholder)
if n_layers > 1:
x0 = upscale(downscale(x))
x1 = x
x = self.reparameterize(x0, x1)
Dx = self.discriminator(x, n_layers)
# Fake and real image mixing for WGAN-GP loss function
interp = tf.random_uniform(shape=[tf.shape(Dz)[0], 1, 1, 1], minval=0.0, maxval=1.0)
x_hat = interp * x + (1 - interp) * Gz
Dx_hat = self.discriminator(x_hat, n_layers)
# Loss function and scalar summaries
with tf.variable_scope('Loss_Function'):
# Wasserstein Distance
wd = Dz - Dx
# Gradient/Lipschitz Penalty
grads = tf.gradients(Dx_hat, [x_hat])[0]
slopes = tf.sqrt(tf.reduce_sum(tf.square(grads), [1, 2, 3]))
if self.lipschitz_penalty:
gp = tf.square(tf.maximum((slopes - self.w_gamma) / self.w_gamma, 0))
else:
gp = tf.square((slopes - self.w_gamma) / self.w_gamma)
gp_scaled = self.w_lambda * gp
# Epsilon penalty keeps discriminator output for drifting too far away from zero
epsilon_cost = self.epsilon * tf.square(Dx)
# Cost and summary scalars
g_cost = tf.reduce_mean(-Dz)
d_cost = tf.reduce_mean(wd + gp_scaled + epsilon_cost)
wd = tf.abs(tf.reduce_mean(wd))
gp = tf.reduce_mean(gp)
# Summaries
wd_sum = tf.summary.scalar('Wasserstein_distance_{}_({}x{})'.format(
n_layers - 1, dim, dim), wd)
gp_sum = tf.summary.scalar('gradient_penalty_{}_({}x{})'.format(
n_layers - 1, dim, dim), gp)
# Collecting variables to be trained by optimizers
g_vars, d_vars = [], []
var_scopes = ['layer_{}'.format(i) for i in range(n_layers)]
var_scopes.extend([
'dense',
'rgb_layer_{}'.format(n_layers - 2),
'rgb_layer_{}'.format(n_layers - 1)
])
for scope in var_scopes:
g_vars.extend(tf.get_collection(
tf.GraphKeys.GLOBAL_VARIABLES, scope='Network/Generator/{}'.format(scope)
))
d_vars.extend(tf.get_collection(
tf.GraphKeys.GLOBAL_VARIABLES, scope='Network/Discriminator/{}'.format(scope)
))
# Generate optimizer operations
# if self.reset_optimizer is True then initialize a new optimizer for each layer
with tf.variable_scope('Optimize'):
if self.reset_optimizer:
g_train = tf.train.AdamOptimizer(
self.lr, self.beta1, self.beta2, name='G_optimizer_{}'.format(n_layers - 1)
).minimize(
g_cost, var_list=g_vars)
d_train = tf.train.AdamOptimizer(
self.lr, self.beta1, self.beta2, name='D_optimizer_{}'.format(n_layers - 1)
).minimize(
d_cost, var_list=d_vars, global_step=self.global_step)
else:
g_train = self.g_optimizer.minimize(g_cost, var_list=g_vars)
d_train = self.d_optimizer.minimize(d_cost, var_list=d_vars, global_step=self.global_step)
# Increment image count
n_imgs = tf.shape(x)[0]
new_image_count = tf.add(self.total_imgs, n_imgs)
img_step_op = tf.assign(self.total_imgs, new_image_count)
d_train = tf.group(d_train, img_step_op)
# Print variable names to before running model
print('\nGenerator variables for layer {} ({} x {}):'.format(n_layers - 1, dim, dim))
print([var.name for var in g_vars])
print('\nDiscriminator variables for layer {} ({} x {}):'.format(n_layers - 1, dim, dim))
print([var.name for var in d_vars])
# Generate preview images
with tf.variable_scope('image_preview'):
n_real_imgs = min(self.batch_sizes[n_layers - 1], 4)
fake_imgs = tensor_to_imgs(Gz)
real_imgs = tensor_to_imgs(x[:n_real_imgs])
# Upsize images to normal visibility
if dim < 256:
fake_imgs = resize_images(fake_imgs, (256, 256))
real_imgs = resize_images(real_imgs, (256, 256))
# Concatenate images into one large image for preview, only used if 24 preview images are requested
if self.big_image and self.n_examples == 24:
fake_img_list = tf.unstack(fake_imgs, num=24)
fake_img_list = [tf.concat(fake_img_list[6 * i:6 * (i + 1)], 1) for i in range(4)]
fake_imgs = tf.concat(fake_img_list, 0)
fake_imgs = tf.expand_dims(fake_imgs, 0)
real_img_list = tf.unstack(real_imgs, num=n_real_imgs)
real_imgs = tf.concat(real_img_list, 1)
real_imgs = tf.expand_dims(real_imgs, 0)
# images summaries
fake_img_sum = tf.summary.image('fake{}x{}'.format(dim, dim), fake_imgs, self.n_examples)
real_img_sum = tf.summary.image('real{}x{}'.format(dim, dim), real_imgs, 4)
return dict(
wd=wd, gp=gp, wd_sum=wd_sum, gp_sum=gp_sum, g_train=g_train, d_train=d_train,
fake_img_sum=fake_img_sum, real_img_sum=real_img_sum, Gz=Gz
)
# Get current layer, global step, alpha and total number of images used so far
def get_global_vars(self):
gs, layer, img_step, alpha, total_imgs = self.sess.run([
self.global_step, self.layer, self.img_step, self.alpha, self.total_imgs
])
if layer == 0: img_step -= self.n_imgs
return gs, layer, img_step, alpha, total_imgs
def get_layer_ops(self, layer):
dim = 2 ** (layer + 2)
batch_size = self.batch_sizes[layer]
n_imgs = self.n_imgs
if layer > 0: n_imgs *= 2
layer_ops = self.networks[layer]
g_train = layer_ops.get('g_train')
d_train = layer_ops.get('d_train')
get_ops = lambda *op_names: [layer_ops.get(name) for name in op_names]
scalar_sum_ops = get_ops('wd', 'gp', 'wd_sum', 'gp_sum')
img_sum_ops = get_ops('fake_img_sum', 'real_img_sum')
return dim, batch_size, n_imgs, g_train, d_train, scalar_sum_ops, img_sum_ops
# Main training function
def train(self, save_interval=80000):
def get_loop_progress(layer, img_step):
percent_done = img_step / self.n_imgs
if layer > 0: percent_done /= 2
time = dt.datetime.now()
return time, percent_done
gs, prev_layer, img_step, alpha, total_imgs = self.get_global_vars()
start_time, start_percent_done = get_loop_progress(prev_layer, img_step)
dim, batch_size, n_imgs, g_train, d_train, scalar_sum_ops, img_sum_ops = self.get_layer_ops(prev_layer)
save_step = (total_imgs // save_interval + 1) * save_interval
while total_imgs < self.max_imgs:
gs, layer, img_step, alpha, total_imgs = self.get_global_vars()
# Get network operations and loss functions for current layer
if layer != prev_layer:
start_time, start_percent_done = get_loop_progress(prev_layer, img_step)
dim, batch_size, n_imgs, g_train, d_train, scalar_sum_ops, img_sum_ops = self.get_layer_ops(layer)
# Get training data and latent variables to store in feed_dict
feed_dict = {
self.x_placeholder: self.feed.x_batch(batch_size, dim),
self.z_placeholder: self.feed.z_batch(batch_size)
}
# Here's where we actually train the model
for _ in range(self.batch_repeats):
self.sess.run(d_train, feed_dict)
self.sess.run(g_train, feed_dict)
if gs % 20 == 0:
# Get loss values and summaries
wd_value, gp_value, wd_sum_str, gp_sum_str = self.sess.run(scalar_sum_ops, feed_dict)
# Print current status, loss functions, etc.
time, percent_done = get_loop_progress(layer, img_step)
print(
'dimensions: ({} x {}) ---- {}% ---- images: {}/{} ---- alpha: {} ---- global step: {}'
'\nWasserstein distance: {}\ngradient penalty: {}'.format(
dim, dim, np.round(percent_done * 100, 4), img_step, n_imgs,
np.round(alpha, 4), gs, wd_value, gp_value
))
# Calculate and print estimated time remaining
delta_t = time - start_time
time_remaining = delta_t * (1 / (percent_done - start_percent_done + 1e-8) - 1)
print('est. time remaining on layer {}: {}\n'.format(layer, time_remaining))
# Log scalar data every 20 global steps
self.writer.add_summary(wd_sum_str, gs)
self.writer.add_summary(gp_sum_str, gs)
# Operations to run every save interval
if total_imgs > save_step:
save_step += save_interval
# Save the model and generate image previews
print('\nsaving and making images...\n')
self.saver.save(
self.sess, os.path.join(self.logdir, "model.ckpt"),
global_step=self.global_step
)
self.feed.save()
img_preview_feed_dict = {
self.x_placeholder: feed_dict[self.x_placeholder][:4],
self.z_placeholder: self.feed.z_fixed
}
fake_img_sum_str, real_img_sum_str = self.sess.run(
img_sum_ops, img_preview_feed_dict
)
self.writer.add_summary(fake_img_sum_str, gs)
self.writer.add_summary(real_img_sum_str, gs)
prev_layer = layer
def get_cur_res(self):
cur_layer = self.sess.run(self.layer)
return 2 ** (2 + cur_layer)
def generate(self, z):
solo = z.ndim == 1
if solo:
z = np.expand_dims(z, 0)
cur_layer = int(self.sess.run(self.layer))
imgs = self.networks[cur_layer][9]
imgs = self.sess.run(imgs, {self.z_placeholder: z})
if solo:
imgs = np.squeeze(imgs, 0)
return imgs
if __name__ == '__main__':
# progan = ProGAN(logdir='logdir_v5', imgdir='memmaps')
# progan = ProGAN(logdir='logdir_v6', imgdir='memmaps', batch_repeats=4)
progan = ProGAN(logdir='logdir_v8', imgdir='memmaps', batch_repeats=4)
# progan = ProGAN(logdir='logdir_v9', imgdir='memmaps', batch_repeats=4, batch_sizes=[128, 128, 128, 64, 32, 16, 12, 8, 4])
progan.train()
================================================
FILE: scripts/downloader.py
================================================
import os
import requests
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
subreddit = input('Enter subreddit name: ')
save_dir = input('Enter name of folder to save images in: ')
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
pages = 100
img_n = 0
browser = webdriver.Firefox()
browser.get('https://old.reddit.com/r/{}'.format(subreddit))
for i in range(pages):
icons = WebDriverWait(browser, 300).until(
EC.presence_of_all_elements_located(
(By.CLASS_NAME, "expando-button")
)
)
for icon in icons:
icon.click()
links = WebDriverWait(browser, 300).until(
EC.presence_of_all_elements_located((By.CLASS_NAME, "may-blank"))
)
links = list(set([a.get_attribute('href') for a in links if a.get_attribute('href').endswith('.jpg')]))
for link in links:
image = requests.get(link)
with open('{}/img_{}.jpg'.format(save_dir, img_n), 'wb') as f:
f.write(image.content)
img_n += 1
if i != pages - 1:
next_button = WebDriverWait(browser, 300).until(
EC.presence_of_element_located((By.CLASS_NAME, "next-button"))
)
next_button.click()
print('page: {}, images: {}'.format(i, len(links)))
================================================
FILE: scripts/image_reshape.py
================================================
import os
import numpy as np
from PIL import Image
def generate_square_crops(imgdir, savedir, crops_per_img=10, max_size=1024, filter=Image.BICUBIC):
img_files = [os.path.join(imgdir, f) for f in os.listdir(imgdir)]
savedir = os.path.join(savedir, '_temp')
if not os.path.exists(savedir): os.makedirs(savedir)
for i, f in enumerate(img_files):
with Image.open(f) as img:
width, height = img.size
if width < max_size or height < max_size: continue
landscape = width > height
if landscape:
new_height = max_size
new_width = int(width * (max_size / height))
offset = int(max_size * (width / height - 1) + 1)
else:
new_width = max_size
new_height = int(height * (max_size / width))
offset = int(max_size * (height / width - 1) + 1)
n_crops = min(offset, crops_per_img)
window_slide_len = offset / n_crops
try:
img = img.convert('RGB')
img = img.resize((new_width, new_height), filter)
for j in range(n_crops):
shift = int(j * window_slide_len)
if landscape: window = (shift, 0, max_size + shift, max_size)
else: window = (0, shift, max_size, max_size + shift)
cropped_img = img.crop(window)
mirror_img = cropped_img.transpose(Image.FLIP_LEFT_RIGHT)
path = os.path.join(savedir, 'img_{}_{}.jpg'.format(i, j))
mirror_path = os.path.join(savedir, 'img_{}_{}_mirror.jpg'.format(i, j))
cropped_img.save(path, "JPEG")
mirror_img.save(mirror_path, "JPEG")
print('Processed {}\n'.format(f))
except OSError:
continue
def resize(savedir, NCHW=True, min_size=4, max_size=1024, max_mem=0.8,
use_uint8=True, filter=Image.BICUBIC):
resized_img_dir = os.path.join(savedir, '_temp')
img_files = [os.path.join(resized_img_dir, f) for f in os.listdir(resized_img_dir)]
np.random.shuffle(img_files)
savedir = os.path.join(savedir, 'memmaps')
if not os.path.exists(savedir): os.makedirs(savedir)
sizes = [
2 ** i for i in range(
int(np.log2(min_size)),
int(np.log2(max_size)) + 1
)]
pixel_bytes = 3 if use_uint8 else 12
max_bytes = max_mem * 1e9
for s in sizes:
max_imgs = int(max_bytes / (pixel_bytes * s ** 2))
batch_shape = (max_imgs, 3, s, s) if NCHW else (max_imgs, s, s, 3)
batch = np.zeros(batch_shape, np.uint8)
img_count = 0
batch_count = 0
for f in img_files:
with Image.open(f) as img:
width, height = img.size
if width != s and height != s:
img = img.resize((s, s), filter)
img = np.asarray(img, np.uint8)
if NCHW:
img = np.transpose(img, (2, 0, 1))
batch[img_count] = img
if img_count < max_imgs - 1:
img_count += 1
else:
path = os.path.join(savedir, '{}_{}.npy'.format(s, batch_count))
np.save(path, batch)
print('Saved {}'.format(path))
img_count = 0
batch_count += 1
if img_count != 0:
path = os.path.join(savedir, '{}_{}.npy'.format(s, batch_count))
np.save(path, batch[:img_count])
print('Saved {}'.format(path))
if __name__ == '__main__':
imgdir = input('Image directory: ')
savedir = input('Memmap directory: ')
#generate_square_crops(imgdir, savedir)
resize(savedir)
gitextract_z63xd0wk/
├── README.md
├── feed_dict.py
├── make_video.py
├── ops.py
├── progan_v15.py
├── progan_v16.py
└── scripts/
├── downloader.py
└── image_reshape.py
SYMBOL INDEX (46 symbols across 6 files)
FILE: feed_dict.py
class FeedDict (line 14) | class FeedDict:
method __init__ (line 18) | def __init__(self, logdir, imgdir, z_length, n_examples, shuffle=True,...
method n_sizes (line 51) | def n_sizes(self): return len(self.sizes)
method __change_res (line 53) | def __change_res(self, res):
method __change_array (line 58) | def __change_array(self):
method z_batch (line 68) | def z_batch(self, batch_size, random_state=None):
method x_batch (line 73) | def x_batch(self, batch_size, res):
method load (line 94) | def load(cls, logdir, **kwargs):
method save (line 104) | def save(self):
FILE: make_video.py
function get_z_from_audio (line 10) | def get_z_from_audio(audio, z_length, n_bins=60, hop_length=512, random_...
function make_video (line 30) | def make_video(audio, filename, progan, n_bins=60, random_state=0, imgs_...
FILE: ops.py
function conv (line 8) | def conv(input, out_channels, filter_size=3, k=1, padding='SAME', mode=N...
function dense (line 52) | def dense(input, output_size):
function leaky_relu (line 60) | def leaky_relu(input, alpha=0.2):
function pixelwise_norm (line 64) | def pixelwise_norm(input):
function g_conv_layer (line 69) | def g_conv_layer(input, out_channels, **kwargs):
function d_conv_layer (line 73) | def d_conv_layer(input, out_channels, **kwargs):
function minibatch_stddev (line 77) | def minibatch_stddev(input):
function upscale (line 90) | def upscale(input):
function downscale (line 98) | def downscale(input):
function resize_images (line 103) | def resize_images(input, dims=None):
function scale_uint8 (line 109) | def scale_uint8(input):
function tensor_to_imgs (line 114) | def tensor_to_imgs(input, switch_dims=True):
FILE: progan_v15.py
class ProGAN (line 15) | class ProGAN:
method __init__ (line 16) | def __init__(self,
method _reparameterize (line 108) | def _reparameterize(self, x0, x1):
method _create_network (line 115) | def _create_network(self, layers):
method _add_summary (line 287) | def _add_summary(self, string, gs):
method _z (line 291) | def _z(self, batch_size):
method train (line 295) | def train(self):
method get_cur_res (line 381) | def get_cur_res(self):
method generate (line 386) | def generate(self, z):
method transform (line 404) | def transform(self, input_img, n_iter=100000):
FILE: progan_v16.py
class ProGAN (line 15) | class ProGAN:
method __init__ (line 16) | def __init__(self,
method reparameterize (line 104) | def reparameterize(self, x0, x1):
method generator (line 112) | def generator(self, z, n_layers):
method discriminator (line 151) | def discriminator(self, x, n_layers):
method create_network (line 189) | def create_network(self, n_layers):
method get_global_vars (line 325) | def get_global_vars(self):
method get_layer_ops (line 333) | def get_layer_ops(self, layer):
method train (line 350) | def train(self, save_interval=80000):
method get_cur_res (line 432) | def get_cur_res(self):
method generate (line 437) | def generate(self, z):
FILE: scripts/image_reshape.py
function generate_square_crops (line 6) | def generate_square_crops(imgdir, savedir, crops_per_img=10, max_size=10...
function resize (line 56) | def resize(savedir, NCHW=True, min_size=4, max_size=1024, max_mem=0.8,
Condensed preview — 8 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (56K chars).
[
{
"path": "README.md",
"chars": 391,
"preview": "# ProGAN\n\nImplementation of Progressive Generative Adversarial Network based on research done by Tero Karras\n\n\nThe model"
},
{
"path": "feed_dict.py",
"chars": 3378,
"preview": "import os\nimport pickle\nimport numpy as np\nfrom itertools import cycle\n\n''' \nFeedDict handles several numpy mem_map arra"
},
{
"path": "make_video.py",
"chars": 2131,
"preview": "from progan_v15 import ProGAN\n\nimport librosa\nimport numpy as np\nfrom moviepy.video.VideoClip import VideoClip\nfrom movi"
},
{
"path": "ops.py",
"chars": 4087,
"preview": "import tensorflow as tf\n\n\nweight_init = tf.random_normal_initializer()\nbias_init = tf.constant_initializer(0)\n\n\ndef conv"
},
{
"path": "progan_v15.py",
"chars": 20481,
"preview": "import os\nimport datetime as dt\n\n# Operations used in building the network. Many are not used in the current model\nfrom "
},
{
"path": "progan_v16.py",
"chars": 18999,
"preview": "import datetime as dt\nimport os\n\nimport numpy as np\n\n# Operations used in building the network. Many are not used in the"
},
{
"path": "scripts/downloader.py",
"chars": 1396,
"preview": "import os\nimport requests\n\nfrom selenium import webdriver\nfrom selenium.webdriver.common.by import By\nfrom selenium.webd"
},
{
"path": "scripts/image_reshape.py",
"chars": 3815,
"preview": "import os\nimport numpy as np\nfrom PIL import Image\n\n\ndef generate_square_crops(imgdir, savedir, crops_per_img=10, max_si"
}
]
About this extraction
This page contains the full source code of the perplexingpegasus/ProGAN GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 8 files (53.4 KB), approximately 13.4k tokens, and a symbol index with 46 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.