Repository: perplexingpegasus/ProGAN Branch: master Commit: 3fda528bfc9d Files: 8 Total size: 53.4 KB Directory structure: gitextract_z63xd0wk/ ├── README.md ├── feed_dict.py ├── make_video.py ├── ops.py ├── progan_v15.py ├── progan_v16.py └── scripts/ ├── downloader.py └── image_reshape.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: README.md ================================================ # ProGAN Implementation of Progressive Generative Adversarial Network based on research done by Tero Karras The model was trained on landscape images collected from Reddit. http://research.nvidia.com/sites/default/files/pubs/2017-10_Progressive-Growing-of/karras2018iclr-paper.pdf ![generated images](https://github.com/perplexingpegasus/ProGAN/blob/master/example_images.png?raw=true) ================================================ FILE: feed_dict.py ================================================ import os import pickle import numpy as np from itertools import cycle ''' FeedDict handles several numpy mem_map arrays of image data saved within the directory. The arrays should be named in the format "n1_n2.npy" where n1 x n1 is the resolution of the image data in the array, and n2 is its number used for indexing purposes. Data should be of type np.float32 and scaled between -1.0 and 1.0. In order to avoid loading unnecessary data into memory, only one mem_map is loaded at a time. ''' class FeedDict: pickle_filename = 'fd_log.pkl' def __init__(self, logdir, imgdir, z_length, n_examples, shuffle=True, min_size=4, max_size=1024): self.logdir = logdir self.shuffle = shuffle self.z_length = z_length self.sizes = [2 ** i for i in range( int(np.log2(min_size)), int(np.log2(max_size)) + 1 )] files = os.listdir(imgdir) self.arrays = dict() for s in [2 ** i for i in range(2, 11)]: path_list = [] for f in files: if f.startswith('{}_'.format(s)): path_list.append(os.path.join(imgdir, f)) if shuffle: np.random.shuffle(path_list) self.arrays.update({s: cycle(path_list)}) self.z_fixed = self.z_batch(n_examples, z_length) self.cur_res = None self.cur_path = None self.cur_array = None self.cur_array_len = 0 self.idx = 0 @property def n_sizes(self): return len(self.sizes) def __change_res(self, res): assert res in self.arrays.keys() self.cur_res = res self.__change_array() def __change_array(self): new_path = next(self.arrays[self.cur_res]) print('Loaded new memmap array: {}'.format(new_path)) if new_path != self.cur_path: self.cur_path = new_path self.cur_array = np.load(new_path) self.cur_array_len = self.cur_array.shape[0] if self.shuffle: np.random.shuffle(self.cur_array) self.idx = 0 def z_batch(self, batch_size, random_state=None): if random_state is not None: np.random.seed(random_state) return np.random.normal(0.0, 1.0, size=[batch_size, self.z_length]) def x_batch(self, batch_size, res): if res != self.cur_res: self.__change_res(res) remaining = self.cur_array_len - self.idx start = self.idx if remaining >= batch_size: stop = start + batch_size batch = self.cur_array[start:stop] else: stop = batch_size - remaining batch = self.cur_array[start:] self.__change_array() batch = np.concatenate((batch, self.cur_array[:stop])) self.idx = stop return batch @classmethod def load(cls, logdir, **kwargs): path = os.path.join(logdir, cls.pickle_filename) if os.path.exists(path): with open(path, 'rb') as f: fd = pickle.load(f) if type(fd) == cls: print('Restored feed_dict -------\n') return fd return cls(logdir, **kwargs) def save(self): path = os.path.join(self.logdir, self.pickle_filename) with open(path, 'wb') as f: pickle.dump(self, f, pickle.HIGHEST_PROTOCOL) ================================================ FILE: make_video.py ================================================ from progan_v15 import ProGAN import librosa import numpy as np from moviepy.video.VideoClip import VideoClip from moviepy.editor import AudioFileClip from sklearn.preprocessing import StandardScaler def get_z_from_audio(audio, z_length, n_bins=60, hop_length=512, random_state=50): np.random.seed(random_state) if type(audio) == str: audio, sr = librosa.load(audio) y = librosa.core.cqt(audio, n_bins=n_bins, hop_length=hop_length) mag, phase = librosa.core.magphase(y) mag = mag.T mag = StandardScaler().fit_transform(mag) s0, s1 = mag.shape static = np.random.normal(size=[z_length - s1]) static = np.tile(static, (s0, 1)) z = np.concatenate((mag, static), 1) z = z.T np.random.shuffle(z) z = z.T return z def make_video(audio, filename, progan, n_bins=60, random_state=0, imgs_per_batch=20): y, sr = librosa.load(audio) song_length = len(y) / sr z_audio = get_z_from_audio(y, z_length=progan.z_length, n_bins=n_bins, random_state=random_state) fps = z_audio.shape[0] / song_length res = progan.get_cur_res() shape = (res, res * 16 // 9, 3) imgs = np.zeros(shape=[imgs_per_batch, *shape], dtype=np.float32) def make_frame(t): global imgs cur_frame_idx = int(t * fps) if cur_frame_idx >= len(z_audio): return np.zeros(shape=shape, dtype=np.uint8) if cur_frame_idx % imgs_per_batch == 0: imgs = progan.generate(z_audio[cur_frame_idx:cur_frame_idx + imgs_per_batch]) imgs = imgs[:, :, :res * 8 // 9, :] imgs_rev = np.flip(imgs, 2) imgs = np.concatenate((imgs, imgs_rev), 2) return imgs[cur_frame_idx % imgs_per_batch] video_clip = VideoClip(make_frame=make_frame, duration=song_length) audio_clip = AudioFileClip(audio) video_clip = video_clip.set_audio(audio_clip) video_clip.write_videofile(filename, fps=fps) if __name__ == '__main__': progan = ProGAN( logdir='logdir_v2', imgdir='img_arrays', ) make_video('videos\\eco_zones.mp3', 'eco_zones.mp4', progan, random_state=768) ================================================ FILE: ops.py ================================================ import tensorflow as tf weight_init = tf.random_normal_initializer() bias_init = tf.constant_initializer(0) def conv(input, out_channels, filter_size=3, k=1, padding='SAME', mode=None, output_shape=None): in_shape = tf.shape(input) input_channels = int(input.get_shape()[1]) if mode == 'upscale' or mode == 'transpose': filter_shape = [filter_size, filter_size, out_channels, input_channels] else: filter_shape = [filter_size, filter_size, input_channels, out_channels] filter = tf.get_variable('filter', filter_shape, initializer=weight_init) fan_in = float(filter_size ** 2 * input_channels) filter = filter * tf.sqrt(2.0 / fan_in) b = tf.get_variable('bias', [1, out_channels, 1, 1], initializer=bias_init) if mode == 'upscale': filter = tf.pad(filter, [[1, 1], [1, 1], [0, 0], [0, 0]], mode='CONSTANT') filter = tf.add_n([filter[1:, 1:], filter[:-1, 1:], filter[1:, :-1], filter[:-1, :-1]]) output_shape = [in_shape[0], out_channels, in_shape[2] * 2, in_shape[3] * 2] output = tf.nn.conv2d_transpose(input, filter, output_shape, [1, 1, 2, 2], padding=padding, data_format='NCHW') elif mode == 'downscale': filter = tf.pad(filter, [[1, 1], [1, 1], [0, 0], [0, 0]], mode='CONSTANT') filter = tf.add_n([filter[1:, 1:], filter[:-1, 1:], filter[1:, :-1], filter[:-1, :-1]]) filter *= 0.25 output = tf.nn.conv2d(input, filter, [1, 1, 2, 2], padding=padding, data_format='NCHW') elif mode == 'transpose': output = tf.nn.conv2d_transpose(input, filter, output_shape, [1, 1, k, k], padding=padding, data_format='NCHW') else: output = tf.nn.conv2d(input, filter, [1, 1, k, k], padding=padding, data_format='NCHW') output += b if out_channels == 1: output = tf.squeeze(output, 3) return output def dense(input, output_size): fan_in = int(input.get_shape()[1]) W = tf.get_variable('W', [fan_in, output_size], initializer=weight_init) W = W * tf.sqrt(2.0 / float(fan_in)) b = tf.get_variable('b', [1, output_size, 1, 1], initializer=bias_init) return tf.matmul(input, W) + b def leaky_relu(input, alpha=0.2): return tf.nn.leaky_relu(input, alpha=alpha) def pixelwise_norm(input): pixel_var = tf.reduce_mean(tf.square(input), 1, keepdims=True) return input / tf.sqrt(pixel_var + 1e-8) def g_conv_layer(input, out_channels, **kwargs): return pixelwise_norm(leaky_relu(conv(input, out_channels, **kwargs))) def d_conv_layer(input, out_channels, **kwargs): return leaky_relu(conv(input, out_channels, **kwargs)) def minibatch_stddev(input): shape = tf.shape(input) group_size = tf.minimum(4, shape[0]) x = tf.reshape(input, [group_size, -1, shape[1], shape[2], shape[3]]) mu = tf.reduce_mean(x, axis=0, keepdims=True) sigma = tf.sqrt(tf.reduce_mean(tf.square(x - mu), axis=0) + 1e-8) sigma_avg = tf.reduce_mean(sigma, axis=[1, 2, 3], keepdims=True) sigma_avg = tf.tile(sigma_avg, [group_size, 1, shape[2], shape[3]]) return tf.concat((input, sigma_avg), axis=1) def upscale(input): shape = tf.shape(input) channels = input.get_shape()[1] output = tf.reshape(input, [-1, channels, shape[2], 1, shape[3], 1]) output = tf.tile(output, [1, 1, 1, 2, 1, 2]) return tf.reshape(output, [-1, channels, shape[2] * 2, shape[3] * 2]) def downscale(input): return tf.nn.avg_pool(input, ksize=[1, 1, 2, 2], strides=[1, 1, 2, 2], padding='SAME', data_format='NCHW') def resize_images(input, dims=None): if dims is None: dims = tf.shape(input)[2] * 2, tf.shape(input)[3] * 2 return tf.image.resize_nearest_neighbor(input, dims) def scale_uint8(input): input = tf.to_float(input) return (input / 127.5) - 1 def tensor_to_imgs(input, switch_dims=True): if switch_dims: input = tf.transpose(input, (0, 2, 3, 1)) imgs = tf.minimum(tf.maximum(input, -tf.ones_like(input)), tf.ones_like(input)) imgs = (imgs + 1) * 127.5 return tf.cast(imgs, tf.uint8) ================================================ FILE: progan_v15.py ================================================ import os import datetime as dt # Operations used in building the network. Many are not used in the current model from ops import * # FeedDict object used to continuously provide new training data from feed_dict import FeedDict # TODO: add argparser and flags # TODO: refactor training function # TODO: train next version of model using reset_optimizer=True class ProGAN: def __init__(self, logdir, # directory of stored models imgdir, # directory of images for FeedDict learning_rate=0.001, # Adam optimizer learning rate beta1=0, # Adam optimizer beta1 beta2=0.99, # Adam optimizer beta2 w_lambda=10.0, # WGAN-GP/LP lambda w_gamma=1.0, # WGAN-GP/LP gamma epsilon=0.001, # WGAN-GP/LP lambda z_length=512, # latent variable size n_imgs=800000, # number of images to show in each growth step batch_repeats=1, # number of times to repeat minibatch n_examples=24, # number of example images to generate lipschitz_penalty=True, # if True, use WGAN-LP instead of WGAN-GP big_image=True, # Generate a single large preview image, only works if n_examples = 24 scaling_factor=None, # factor to scale down number of trainable parameters reset_optimizer=False, # reset optimizer variables with each new layer ): # Scale down the number of factors if scaling_factor is provided self.channels = [512, 512, 512, 512, 256, 128, 64, 32, 16, 8] if scaling_factor: assert scaling_factor > 1 self.channels = [max(4, c // scaling_factor) for c in self.channels] self.batch_size = [16, 16, 16, 16, 16, 16, 8, 4, 3] self.z_length = z_length self.n_examples = n_examples self.batch_repeats = batch_repeats if batch_repeats else 1 self.n_imgs = n_imgs self.logdir = logdir self.big_image = big_image self.w_lambda = w_lambda self.w_gamma = w_gamma self.epsilon = epsilon self.reset_optimizer=reset_optimizer self.lipschitz_penalty = lipschitz_penalty self.start = True # Generate fized latent variables for image previews np.random.seed(0) self.z_fixed = np.random.normal(size=[self.n_examples, self.z_length]) # Initialize placeholders self.x_placeholder = tf.placeholder(tf.float32, [None, None, None, 3]) self.z_placeholder = tf.placeholder(tf.float32, [None, self.z_length]) # Global step with tf.variable_scope('global_step'): self.global_step = tf.Variable(0, name='global_step', trainable=False) self.global_step_op = tf.assign(self.global_step, tf.add(self.global_step, 1)) # Non-trainable variables for counting to next layer and incrementing value of alpha with tf.variable_scope('image_count'): self.total_imgs = tf.Variable(0.0, name='image_step', trainable=False) self.img_count_placeholder = tf.placeholder(tf.float32) self.img_step_op = tf.assign(self.total_imgs, tf.add(self.total_imgs, self.img_count_placeholder)) self.img_step = tf.mod(tf.add(self.total_imgs, self.n_imgs), self.n_imgs * 2) self.alpha = tf.minimum(1.0, tf.div(self.img_step, self.n_imgs)) self.layer = tf.floor_div(tf.add(self.total_imgs, self.n_imgs), self.n_imgs * 2) # Initialize optimizer as member variable if not rest_optimizer, otherwise generate new # optimizer for each layer if self.reset_optimizer: self.lr = learning_rate self.beta1 = beta1 self.beta2 = beta2 else: self.g_optimizer = tf.train.AdamOptimizer(learning_rate, beta1, beta2) self.d_optimizer = tf.train.AdamOptimizer(learning_rate, beta1, beta2) # Initialize FeedDict self.feed = FeedDict.load(imgdir, logdir) self.n_layers = int(np.log2(1024)) - 1 self.networks = [self._create_network(i + 1) for i in range(self.n_layers)] # Initialize Session, FileWriter and Saver self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) self.writer = tf.summary.FileWriter(self.logdir, graph=self.sess.graph) self.saver = tf.train.Saver() # Look in logdir to see if a saved model already exists. If so, load it try: self.saver.restore(self.sess, tf.train.latest_checkpoint(self.logdir)) print('Restored ----------------\n') except Exception: pass # Function for fading input of current layer into previous layer based on current value of alpha def _reparameterize(self, x0, x1): return tf.add( tf.scalar_mul(tf.subtract(1.0, self.alpha), x0), tf.scalar_mul(self.alpha, x1) ) # Function for creating network layout at each layer def _create_network(self, layers): # Build the generator for this layer def generator(z): with tf.variable_scope('Generator'): with tf.variable_scope('latent_vector'): z = tf.expand_dims(z, 1) g1 = tf.expand_dims(z, 2) for i in range(layers): with tf.variable_scope('layer_{}'.format(i)): if i > 0: g1 = resize(g1) if i == layers - 1 and layers > 1: g0 = g1 with tf.variable_scope('1'): if i == 0: g1 = pixelwise_norm(leaky_relu(conv2d_transpose( g1, [tf.shape(g1)[0], 4, 4, self.channels[0]]))) else: g1 = pixelwise_norm(leaky_relu(conv2d(g1, self.channels[i]))) with tf.variable_scope('2'): g1 = pixelwise_norm(leaky_relu(conv2d(g1, self.channels[i]))) with tf.variable_scope('rgb_layer_{}'.format(layers - 1)): g1 = conv2d(g1, 3, 1, weight_norm=False) if layers > 1: with tf.variable_scope('rgb_layer_{}'.format(layers - 2)): g0 = conv2d(g0, 3, 1, weight_norm=False) g = self._reparameterize(g0, g1) else: g = g1 return g # Build the discriminator for this layer def discriminator(x): with tf.variable_scope('Discriminator'): if layers > 1: with tf.variable_scope('rgb_layer_{}'.format(layers - 2)): d0 = avg_pool(x) d0 = leaky_relu(conv2d(d0, self.channels[layers - 1], 1)) with tf.variable_scope('rgb_layer_{}'.format(layers - 1)): d1 = leaky_relu(conv2d(x, self.channels[layers], 1)) for i in reversed(range(layers)): with tf.variable_scope('layer_{}'.format(i)): if i == 0: d1 = minibatch_stddev(d1) with tf.variable_scope('1'): d1 = leaky_relu(conv2d(d1, self.channels[i])) with tf.variable_scope('2'): if i == 0: d1 = leaky_relu(conv2d(d1, self.channels[i], 4, padding='VALID')) else: d1 = leaky_relu(conv2d(d1, self.channels[i])) if i != 0: d1 = avg_pool(d1) if i == layers - 1 and layers > 1: d1 = self._reparameterize(d0, d1) with tf.variable_scope('dense'): d = tf.reshape(d1, [-1, self.channels[0]]) d = dense_layer(d, 1) return d # image dimensions dim = 2 ** (layers + 1) # Build the current network with tf.variable_scope('Network', reuse=tf.AUTO_REUSE): Gz = generator(self.z_placeholder) Dz = discriminator(Gz) # Mix different resolutions of input images according to value of alpha with tf.variable_scope('reshape'): if layers > 1: x0 = resize(self.x_placeholder, (dim // 2, dim // 2)) x0 = resize(x0, (dim, dim)) x1 = resize(self.x_placeholder, (dim, dim)) x = self._reparameterize(x0, x1) else: x = resize(self.x_placeholder, (dim, dim)) Dx = discriminator(x) # Fake and real image mixing for WGAN-GP loss function interp = tf.random_uniform(shape=[tf.shape(Dz)[0], 1, 1, 1], minval=0., maxval=1.) x_hat = interp * x + (1 - interp) * Gz Dx_hat = discriminator(x_hat) # Loss function and scalar summaries with tf.variable_scope('Loss_Function'): # Wasserstein Distance wd = Dz - Dx # Gradient/Lipschitz Penalty grads = tf.gradients(Dx_hat, [x_hat])[0] slopes = tf.sqrt(tf.reduce_sum(tf.square(grads), [1, 2, 3])) if self.lipschitz_penalty: gp = tf.square(tf.maximum((slopes - self.w_gamma) / self.w_gamma, 0)) else: gp = tf.square((slopes - self.w_gamma) / self.w_gamma) gp_scaled = self.w_lambda * gp # Epsilon penalty keeps discriminator output for drifting too far away from zero epsilon_cost = self.epsilon * tf.square(Dx) # Cost and summary scalars g_cost = tf.reduce_mean(-Dz) d_cost = tf.reduce_mean(wd + gp_scaled + epsilon_cost) wd = tf.abs(tf.reduce_mean(wd)) gp = tf.reduce_mean(gp) # Summaries wd_sum = tf.summary.scalar('Wasserstein_distance_{}x{}'.format(dim, dim), wd) gp_sum = tf.summary.scalar('gradient_penalty_{}x{}'.format(dim, dim), gp) # Collecting variables to be trained by optimizers g_vars, d_vars = [], [] var_scopes = ['layer_{}'.format(i) for i in range(layers)] var_scopes.extend(['dense', 'rgb_layer_{}'.format(layers - 1), 'rgb_layer_{}'.format(layers - 2)]) for scope in var_scopes: g_vars.extend(tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='Network/Generator/{}'.format(scope))) d_vars.extend(tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='Network/Discriminator/{}'.format(scope))) # Generate optimizer operations # if self.reset_optimizer is True then initialize a new optimizer for each layer with tf.variable_scope('Optimize'): if self.reset_optimizer: g_train = tf.train.AdamOptimizer( self.lr, self.beta1, self.beta2, name='G_optimizer_{}'.format(layers - 1)).minimize( g_cost, var_list=g_vars) d_train = tf.train.AdamOptimizer( self.lr, self.beta1, self.beta2, name='D_optimizer_{}'.format(layers - 1)).minimize( d_cost, var_list=d_vars) else: g_train = self.g_optimizer.minimize(g_cost, var_list=g_vars) d_train = self.d_optimizer.minimize(d_cost, var_list=d_vars) # Print variable names to before running model print([var.name for var in g_vars]) print([var.name for var in d_vars]) # Generate preview images with tf.variable_scope('image_preview'): fake_imgs = tf.minimum(tf.maximum(Gz, -tf.ones_like(Gz)), tf.ones_like(Gz)) real_imgs = x[:min(self.batch_size[layers - 1], 4), :, :, :] # Upsize images to normal visibility if dim < 256: fake_imgs = resize(fake_imgs, (256, 256)) real_imgs = resize(real_imgs, (256, 256)) # Concatenate images into one large image for preview, only used if 24 preview images are requested if self.big_image and self.n_examples == 24: fake_img_list = tf.unstack(fake_imgs, num=24) fake_img_list = [tf.concat(fake_img_list[6 * i:6 * (i + 1)], 1) for i in range(4)] fake_imgs = tf.concat(fake_img_list, 0) fake_imgs = tf.expand_dims(fake_imgs, 0) real_img_list = tf.unstack(real_imgs, num=min(self.batch_size[layers - 1], 4)) real_imgs = tf.concat(real_img_list, 1) real_imgs = tf.expand_dims(real_imgs, 0) # images summaries fake_img_sum = tf.summary.image('fake{}x{}'.format(dim, dim), fake_imgs, self.n_examples) real_img_sum = tf.summary.image('real{}x{}'.format(dim, dim), real_imgs, 4) return (dim, wd, gp, wd_sum, gp_sum, g_train, d_train, fake_img_sum, real_img_sum, Gz, discriminator) # Summary adding function def _add_summary(self, string, gs): self.writer.add_summary(string, gs) # Latent variable 'z' generator def _z(self, batch_size): return np.random.normal(0.0, 1.0, [batch_size, self.z_length]) # Main training function def train(self): prev_layer = None start_time = dt.datetime.now() total_imgs = self.sess.run(self.total_imgs) while total_imgs < (self.n_layers - 0.5) * self.n_imgs * 2: # Get current layer, global step, alpha and total number of images used so far layer, gs, img_step, alpha, total_imgs = self.sess.run([ self.layer, self.global_step, self.img_step, self.alpha, self.total_imgs]) layer = int(layer) # Global step interval to save model and generate image previews save_interval = max(1000, 10000 // 2 ** layer) # Get network operations and loss functions for current layer (dim, wd, gp, wd_sum, gp_sum, g_train, d_train, fake_img_sum, real_img_sum, Gz, discriminator) = self.networks[layer] # Get training data and latent variables to store in feed_dict feed_dict = {self.x_placeholder: self.feed.next_batch(self.batch_size[layer], dim), self.z_placeholder: self._z(self.batch_size[layer])} # Reset start times if a new layer has begun training if layer != prev_layer: start_time = dt.datetime.now() # Here's where we actually train the model for _ in range(self.batch_repeats): self.sess.run(g_train, feed_dict) self.sess.run(d_train, feed_dict) # Get loss values and summaries wd_, gp_, wd_sum_str, gp_sum_str = self.sess.run([wd, gp, wd_sum, gp_sum], feed_dict) # Print current status, loss functions, etc. percent_done = np.round(img_step * 50 / self.n_imgs, 4) imgs_done = int(img_step) cur_layer_imgs = self.n_imgs * 2 if dim == 4: percent_done = np.round((percent_done - 50) * 2, 4) imgs_done -= self.n_imgs cur_layer_imgs //= 2 print('dimensions: {}x{} ---- {}% ---- images: {}/{} ---- alpha: {} ---- global step: {}' '\nWasserstein distance: {}\ngradient penalty: {}\n'.format( dim, dim, percent_done, imgs_done, cur_layer_imgs, alpha, gs, wd_, gp_)) # Log scalar data every 20 global steps if gs % 20 == 0: self._add_summary(wd_sum_str, gs) self._add_summary(gp_sum_str, gs) # Operations to run every save interval if gs % save_interval == 0: # Do not save the model or generate images immediately after loading/preloading if self.start: self.start = False # Save the model and generate image previews else: print('saving and making images...\n') self.feed.save() self.saver.save( self.sess, os.path.join(self.logdir, "model.ckpt"), global_step=self.global_step) real_img_sum_str = self.sess.run(real_img_sum, feed_dict) img_preview_feed_dict = { self.x_placeholder: feed_dict[self.x_placeholder][:4], self.z_placeholder: self.z_fixed} fake_img_sum_str = self.sess.run(fake_img_sum, img_preview_feed_dict) self._add_summary(fake_img_sum_str, gs) self._add_summary(real_img_sum_str, gs) # Increment image count and global step variables img_count = self.batch_repeats * self.batch_size[layer] self.sess.run(self.global_step_op) self.sess.run(self.img_step_op, {self.img_count_placeholder: img_count}) # Calculate and print estimated time remaining prev_layer = layer avg_time = (dt.datetime.now() - start_time) / (imgs_done + self.batch_size[layer]) steps_remaining = cur_layer_imgs - imgs_done time_reamining = avg_time * steps_remaining print('est. time remaining on current layer: {}'.format(time_reamining)) def get_cur_res(self): cur_layer = int(self.sess.run(self.layer)) return 2 ** (2 + cur_layer) # Function for generating images from a 1D or 2D array of latent vectors def generate(self, z): if len(z.shape) == 1: z = np.expand_dims(z, 0) cur_layer = int(self.sess.run(self.layer)) G = self.networks[cur_layer][9] imgs = self.sess.run(G, {self.z_placeholder: z}) imgs = np.minimum(imgs, 1.0) imgs = np.maximum(imgs, -1.0) imgs = (imgs + 1) * 255 / 2 imgs = np.uint8(imgs) if imgs.shape[0] == 1: imgs = np.squeeze(imgs, 0) return imgs def transform(self, input_img, n_iter=100000): with tf.variable_scope('transform'): global_step = tf.Variable(0, name='transform_global_step', trainable=False) transform_img = tf.Variable(input_img, name='transform_img', dtype=tf.float32) cur_layer = int(self.sess.run(self.layer)) (dim, wd, gp, wd_sum, gp_sum, g_train, d_train, ake_img_sum, real_img_sum, Gz, discriminator) = self.networks[cur_layer] with tf.variable_scope('Network', reuse=tf.AUTO_REUSE): with tf.variable_scope('resize'): jitter = tf.random_uniform([2], -10, 10, tf.int32) img = tf.manip.roll(transform_img, jitter, [1, 2]) img = resize(img, (dim, dim)) Dt = discriminator(img) t_cost = tf.reduce_mean(-Dt) tc_sum = tf.summary.scalar('transform_cost_{}x{}'.format(dim, dim), t_cost) t_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='transform/transform_img') t_train = tf.train.AdamOptimizer(0.0001).minimize( t_cost, var_list=t_vars, global_step=global_step) transform_img_sum = tf.summary.image('transform', transform_img) self.sess.run(tf.global_variables_initializer()) for i in range(n_iter): gs, t_cost_, tc_sum_str, _ = self.sess.run([global_step, t_cost, tc_sum, t_train]) print('Global step: {}, cost: {}\n\n'.format(gs, t_cost_)) if i % 20 == 0: self._add_summary(tc_sum_str, gs) if i % 1000 == 0: img_sum_str = self.sess.run(transform_img_sum) self._add_summary(img_sum_str, gs) if __name__ == '__main__': progan = ProGAN( logdir='logdir_v2', imgdir='img_arrays', ) # progan = ProGAN( # logdir='logdir_v3', # imgdir='img_arrays_botanical', # reset_optimizer=True # ) progan.train() ================================================ FILE: progan_v16.py ================================================ import datetime as dt import os import numpy as np # Operations used in building the network. Many are not used in the current model from ops import * # FeedDict object used to continuously provide new training data from feed_dict import FeedDict # TODO: add argparser and flags class ProGAN: def __init__(self, logdir, # directory of stored models imgdir, # directory of images for FeedDict learning_rate=0.001, # Adam optimizer learning rate beta1=0, # Adam optimizer beta1 beta2=0.99, # Adam optimizer beta2 w_lambda=10.0, # WGAN-GP/LP lambda w_gamma=1.0, # WGAN-GP/LP gamma epsilon=0.001, # WGAN-GP/LP lambda z_length=512, # latent variable size n_imgs=800000, # number of images to show in each growth step batch_repeats=1, # number of times to repeat minibatch n_examples=24, # number of example images to generate lipschitz_penalty=True, # if True, use WGAN-LP instead of WGAN-GP big_image=True, # Generate a single large preview image, only works if n_examples = 24 reset_optimizer=True, # reset optimizer variables with each new layer batch_sizes=None, channels=None, ): # Scale down the number of factors if scaling_factor is provided self.channels = channels if channels else [512, 512, 512, 512, 256, 128, 64, 32, 16, 16] self.batch_sizes = batch_sizes if batch_sizes else [16, 16, 16, 16, 16, 16, 12, 4, 3] self.z_length = z_length self.n_examples = n_examples self.batch_repeats = batch_repeats if batch_repeats else 1 self.n_imgs = n_imgs self.logdir = logdir self.big_image = big_image self.w_lambda = w_lambda self.w_gamma = w_gamma self.epsilon = epsilon self.reset_optimizer=reset_optimizer self.lipschitz_penalty = lipschitz_penalty # Initialize FeedDict self.feed = FeedDict.load(logdir, imgdir=imgdir, z_length=z_length, n_examples=n_examples) self.n_layers = self.feed.n_sizes self.max_imgs = (self.n_layers - 0.5) * self.n_imgs * 2 # Initialize placeholders self.x_placeholder = tf.placeholder(tf.uint8, [None, 3, None, None]) self.z_placeholder = tf.placeholder(tf.float32, [None, self.z_length]) # Global step with tf.variable_scope('global_step'): self.global_step = tf.Variable(0, name='global_step', trainable=False, dtype=tf.int32) # Non-trainable variables for counting to next layer and incrementing value of alpha with tf.variable_scope('image_count'): self.total_imgs = tf.Variable(0, name='total_images', trainable=False, dtype=tf.int32) img_offset = tf.add(self.total_imgs, self.n_imgs) imgs_per_layer = self.n_imgs * 2 self.img_step = tf.mod(img_offset, imgs_per_layer) self.layer = tf.minimum(tf.floor_div(img_offset, imgs_per_layer), self.n_layers - 1) fade_in = tf.to_float(self.img_step) / float(self.n_imgs) self.alpha = tf.minimum(1.0, tf.maximum(0.0, fade_in)) # Initialize optimizer as member variable if not rest_optimizer, otherwise generate new # optimizer for each layer if self.reset_optimizer: self.lr = learning_rate self.beta1 = beta1 self.beta2 = beta2 else: self.g_optimizer = tf.train.AdamOptimizer(learning_rate, beta1, beta2) self.d_optimizer = tf.train.AdamOptimizer(learning_rate, beta1, beta2) self.networks = [self.create_network(i + 1) for i in range(self.n_layers)] # Initialize Session, FileWriter and Saver self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) self.writer = tf.summary.FileWriter(self.logdir, graph=self.sess.graph) self.saver = tf.train.Saver() # Look in logdir to see if a saved model already exists. If so, load it try: self.saver.restore(self.sess, tf.train.latest_checkpoint(self.logdir)) print('Restored model -----------\n') except Exception: pass # Function for fading input of current layer into previous layer based on current value of alpha def reparameterize(self, x0, x1): return tf.add( tf.scalar_mul(tf.subtract(1.0, self.alpha), x0), tf.scalar_mul(self.alpha, x1) ) # Build a generator for n layers def generator(self, z, n_layers): with tf.variable_scope('Generator'): with tf.variable_scope('latent_vector'): z = tf.expand_dims(z, 2) g1 = tf.expand_dims(z, 3) for i in range(n_layers): with tf.variable_scope('layer_{}'.format(i)): if i == n_layers - 1: g0 = g1 with tf.variable_scope('1'): if i == 0: g1 = g_conv_layer(g1, self.channels[i], filter_size=4, padding='VALID', mode='transpose', output_shape=[tf.shape(g1)[0], self.channels[i], 4, 4]) else: g1 = g_conv_layer(g1, self.channels[i], mode='upscale') with tf.variable_scope('2'): g1 = g_conv_layer(g1, self.channels[i]) with tf.variable_scope('rgb_layer_{}'.format(n_layers - 1)): g1 = conv(g1, 3, filter_size=1) if n_layers > 1: with tf.variable_scope('rgb_layer_{}'.format(n_layers - 2)): g0 = conv(g0, 3, filter_size=1) g0 = upscale(g0) g = self.reparameterize(g0, g1) else: g = g1 return g # Build a discriminator n layers def discriminator(self, x, n_layers): with tf.variable_scope('Discriminator'): if n_layers > 1: with tf.variable_scope('rgb_layer_{}'.format(n_layers - 2)): d0 = downscale(x) d0 = d_conv_layer(d0, self.channels[n_layers - 1], filter_size=1) with tf.variable_scope('rgb_layer_{}'.format(n_layers - 1)): d1 = d_conv_layer(x, self.channels[n_layers], filter_size=1) for i in reversed(range(n_layers)): with tf.variable_scope('layer_{}'.format(i)): if i == 0: d1 = minibatch_stddev(d1) with tf.variable_scope('1'): d1 = d_conv_layer(d1, self.channels[i]) with tf.variable_scope('2'): if i == 0: d1 = d_conv_layer(d1, self.channels[0], filter_size=4, padding='VALID') else: d1 = d_conv_layer(d1, self.channels[i], mode='downscale') if i == n_layers - 1 and n_layers > 1: d1 = self.reparameterize(d0, d1) with tf.variable_scope('dense'): d = tf.reshape(d1, [-1, self.channels[0]]) d = dense(d, 1) return d # Function for creating network layout at each layer def create_network(self, n_layers): # image dimensions dim = 2 ** (n_layers + 1) # Build the current network with tf.variable_scope('Network', reuse=tf.AUTO_REUSE): Gz = self.generator(self.z_placeholder, n_layers) Dz = self.discriminator(Gz, n_layers) # Mix different resolutions of input images according to value of alpha with tf.variable_scope('training_images'): x = scale_uint8(self.x_placeholder) if n_layers > 1: x0 = upscale(downscale(x)) x1 = x x = self.reparameterize(x0, x1) Dx = self.discriminator(x, n_layers) # Fake and real image mixing for WGAN-GP loss function interp = tf.random_uniform(shape=[tf.shape(Dz)[0], 1, 1, 1], minval=0.0, maxval=1.0) x_hat = interp * x + (1 - interp) * Gz Dx_hat = self.discriminator(x_hat, n_layers) # Loss function and scalar summaries with tf.variable_scope('Loss_Function'): # Wasserstein Distance wd = Dz - Dx # Gradient/Lipschitz Penalty grads = tf.gradients(Dx_hat, [x_hat])[0] slopes = tf.sqrt(tf.reduce_sum(tf.square(grads), [1, 2, 3])) if self.lipschitz_penalty: gp = tf.square(tf.maximum((slopes - self.w_gamma) / self.w_gamma, 0)) else: gp = tf.square((slopes - self.w_gamma) / self.w_gamma) gp_scaled = self.w_lambda * gp # Epsilon penalty keeps discriminator output for drifting too far away from zero epsilon_cost = self.epsilon * tf.square(Dx) # Cost and summary scalars g_cost = tf.reduce_mean(-Dz) d_cost = tf.reduce_mean(wd + gp_scaled + epsilon_cost) wd = tf.abs(tf.reduce_mean(wd)) gp = tf.reduce_mean(gp) # Summaries wd_sum = tf.summary.scalar('Wasserstein_distance_{}_({}x{})'.format( n_layers - 1, dim, dim), wd) gp_sum = tf.summary.scalar('gradient_penalty_{}_({}x{})'.format( n_layers - 1, dim, dim), gp) # Collecting variables to be trained by optimizers g_vars, d_vars = [], [] var_scopes = ['layer_{}'.format(i) for i in range(n_layers)] var_scopes.extend([ 'dense', 'rgb_layer_{}'.format(n_layers - 2), 'rgb_layer_{}'.format(n_layers - 1) ]) for scope in var_scopes: g_vars.extend(tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='Network/Generator/{}'.format(scope) )) d_vars.extend(tf.get_collection( tf.GraphKeys.GLOBAL_VARIABLES, scope='Network/Discriminator/{}'.format(scope) )) # Generate optimizer operations # if self.reset_optimizer is True then initialize a new optimizer for each layer with tf.variable_scope('Optimize'): if self.reset_optimizer: g_train = tf.train.AdamOptimizer( self.lr, self.beta1, self.beta2, name='G_optimizer_{}'.format(n_layers - 1) ).minimize( g_cost, var_list=g_vars) d_train = tf.train.AdamOptimizer( self.lr, self.beta1, self.beta2, name='D_optimizer_{}'.format(n_layers - 1) ).minimize( d_cost, var_list=d_vars, global_step=self.global_step) else: g_train = self.g_optimizer.minimize(g_cost, var_list=g_vars) d_train = self.d_optimizer.minimize(d_cost, var_list=d_vars, global_step=self.global_step) # Increment image count n_imgs = tf.shape(x)[0] new_image_count = tf.add(self.total_imgs, n_imgs) img_step_op = tf.assign(self.total_imgs, new_image_count) d_train = tf.group(d_train, img_step_op) # Print variable names to before running model print('\nGenerator variables for layer {} ({} x {}):'.format(n_layers - 1, dim, dim)) print([var.name for var in g_vars]) print('\nDiscriminator variables for layer {} ({} x {}):'.format(n_layers - 1, dim, dim)) print([var.name for var in d_vars]) # Generate preview images with tf.variable_scope('image_preview'): n_real_imgs = min(self.batch_sizes[n_layers - 1], 4) fake_imgs = tensor_to_imgs(Gz) real_imgs = tensor_to_imgs(x[:n_real_imgs]) # Upsize images to normal visibility if dim < 256: fake_imgs = resize_images(fake_imgs, (256, 256)) real_imgs = resize_images(real_imgs, (256, 256)) # Concatenate images into one large image for preview, only used if 24 preview images are requested if self.big_image and self.n_examples == 24: fake_img_list = tf.unstack(fake_imgs, num=24) fake_img_list = [tf.concat(fake_img_list[6 * i:6 * (i + 1)], 1) for i in range(4)] fake_imgs = tf.concat(fake_img_list, 0) fake_imgs = tf.expand_dims(fake_imgs, 0) real_img_list = tf.unstack(real_imgs, num=n_real_imgs) real_imgs = tf.concat(real_img_list, 1) real_imgs = tf.expand_dims(real_imgs, 0) # images summaries fake_img_sum = tf.summary.image('fake{}x{}'.format(dim, dim), fake_imgs, self.n_examples) real_img_sum = tf.summary.image('real{}x{}'.format(dim, dim), real_imgs, 4) return dict( wd=wd, gp=gp, wd_sum=wd_sum, gp_sum=gp_sum, g_train=g_train, d_train=d_train, fake_img_sum=fake_img_sum, real_img_sum=real_img_sum, Gz=Gz ) # Get current layer, global step, alpha and total number of images used so far def get_global_vars(self): gs, layer, img_step, alpha, total_imgs = self.sess.run([ self.global_step, self.layer, self.img_step, self.alpha, self.total_imgs ]) if layer == 0: img_step -= self.n_imgs return gs, layer, img_step, alpha, total_imgs def get_layer_ops(self, layer): dim = 2 ** (layer + 2) batch_size = self.batch_sizes[layer] n_imgs = self.n_imgs if layer > 0: n_imgs *= 2 layer_ops = self.networks[layer] g_train = layer_ops.get('g_train') d_train = layer_ops.get('d_train') get_ops = lambda *op_names: [layer_ops.get(name) for name in op_names] scalar_sum_ops = get_ops('wd', 'gp', 'wd_sum', 'gp_sum') img_sum_ops = get_ops('fake_img_sum', 'real_img_sum') return dim, batch_size, n_imgs, g_train, d_train, scalar_sum_ops, img_sum_ops # Main training function def train(self, save_interval=80000): def get_loop_progress(layer, img_step): percent_done = img_step / self.n_imgs if layer > 0: percent_done /= 2 time = dt.datetime.now() return time, percent_done gs, prev_layer, img_step, alpha, total_imgs = self.get_global_vars() start_time, start_percent_done = get_loop_progress(prev_layer, img_step) dim, batch_size, n_imgs, g_train, d_train, scalar_sum_ops, img_sum_ops = self.get_layer_ops(prev_layer) save_step = (total_imgs // save_interval + 1) * save_interval while total_imgs < self.max_imgs: gs, layer, img_step, alpha, total_imgs = self.get_global_vars() # Get network operations and loss functions for current layer if layer != prev_layer: start_time, start_percent_done = get_loop_progress(prev_layer, img_step) dim, batch_size, n_imgs, g_train, d_train, scalar_sum_ops, img_sum_ops = self.get_layer_ops(layer) # Get training data and latent variables to store in feed_dict feed_dict = { self.x_placeholder: self.feed.x_batch(batch_size, dim), self.z_placeholder: self.feed.z_batch(batch_size) } # Here's where we actually train the model for _ in range(self.batch_repeats): self.sess.run(d_train, feed_dict) self.sess.run(g_train, feed_dict) if gs % 20 == 0: # Get loss values and summaries wd_value, gp_value, wd_sum_str, gp_sum_str = self.sess.run(scalar_sum_ops, feed_dict) # Print current status, loss functions, etc. time, percent_done = get_loop_progress(layer, img_step) print( 'dimensions: ({} x {}) ---- {}% ---- images: {}/{} ---- alpha: {} ---- global step: {}' '\nWasserstein distance: {}\ngradient penalty: {}'.format( dim, dim, np.round(percent_done * 100, 4), img_step, n_imgs, np.round(alpha, 4), gs, wd_value, gp_value )) # Calculate and print estimated time remaining delta_t = time - start_time time_remaining = delta_t * (1 / (percent_done - start_percent_done + 1e-8) - 1) print('est. time remaining on layer {}: {}\n'.format(layer, time_remaining)) # Log scalar data every 20 global steps self.writer.add_summary(wd_sum_str, gs) self.writer.add_summary(gp_sum_str, gs) # Operations to run every save interval if total_imgs > save_step: save_step += save_interval # Save the model and generate image previews print('\nsaving and making images...\n') self.saver.save( self.sess, os.path.join(self.logdir, "model.ckpt"), global_step=self.global_step ) self.feed.save() img_preview_feed_dict = { self.x_placeholder: feed_dict[self.x_placeholder][:4], self.z_placeholder: self.feed.z_fixed } fake_img_sum_str, real_img_sum_str = self.sess.run( img_sum_ops, img_preview_feed_dict ) self.writer.add_summary(fake_img_sum_str, gs) self.writer.add_summary(real_img_sum_str, gs) prev_layer = layer def get_cur_res(self): cur_layer = self.sess.run(self.layer) return 2 ** (2 + cur_layer) def generate(self, z): solo = z.ndim == 1 if solo: z = np.expand_dims(z, 0) cur_layer = int(self.sess.run(self.layer)) imgs = self.networks[cur_layer][9] imgs = self.sess.run(imgs, {self.z_placeholder: z}) if solo: imgs = np.squeeze(imgs, 0) return imgs if __name__ == '__main__': # progan = ProGAN(logdir='logdir_v5', imgdir='memmaps') # progan = ProGAN(logdir='logdir_v6', imgdir='memmaps', batch_repeats=4) progan = ProGAN(logdir='logdir_v8', imgdir='memmaps', batch_repeats=4) # progan = ProGAN(logdir='logdir_v9', imgdir='memmaps', batch_repeats=4, batch_sizes=[128, 128, 128, 64, 32, 16, 12, 8, 4]) progan.train() ================================================ FILE: scripts/downloader.py ================================================ import os import requests from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC subreddit = input('Enter subreddit name: ') save_dir = input('Enter name of folder to save images in: ') if not os.path.isdir(save_dir): os.makedirs(save_dir) pages = 100 img_n = 0 browser = webdriver.Firefox() browser.get('https://old.reddit.com/r/{}'.format(subreddit)) for i in range(pages): icons = WebDriverWait(browser, 300).until( EC.presence_of_all_elements_located( (By.CLASS_NAME, "expando-button") ) ) for icon in icons: icon.click() links = WebDriverWait(browser, 300).until( EC.presence_of_all_elements_located((By.CLASS_NAME, "may-blank")) ) links = list(set([a.get_attribute('href') for a in links if a.get_attribute('href').endswith('.jpg')])) for link in links: image = requests.get(link) with open('{}/img_{}.jpg'.format(save_dir, img_n), 'wb') as f: f.write(image.content) img_n += 1 if i != pages - 1: next_button = WebDriverWait(browser, 300).until( EC.presence_of_element_located((By.CLASS_NAME, "next-button")) ) next_button.click() print('page: {}, images: {}'.format(i, len(links))) ================================================ FILE: scripts/image_reshape.py ================================================ import os import numpy as np from PIL import Image def generate_square_crops(imgdir, savedir, crops_per_img=10, max_size=1024, filter=Image.BICUBIC): img_files = [os.path.join(imgdir, f) for f in os.listdir(imgdir)] savedir = os.path.join(savedir, '_temp') if not os.path.exists(savedir): os.makedirs(savedir) for i, f in enumerate(img_files): with Image.open(f) as img: width, height = img.size if width < max_size or height < max_size: continue landscape = width > height if landscape: new_height = max_size new_width = int(width * (max_size / height)) offset = int(max_size * (width / height - 1) + 1) else: new_width = max_size new_height = int(height * (max_size / width)) offset = int(max_size * (height / width - 1) + 1) n_crops = min(offset, crops_per_img) window_slide_len = offset / n_crops try: img = img.convert('RGB') img = img.resize((new_width, new_height), filter) for j in range(n_crops): shift = int(j * window_slide_len) if landscape: window = (shift, 0, max_size + shift, max_size) else: window = (0, shift, max_size, max_size + shift) cropped_img = img.crop(window) mirror_img = cropped_img.transpose(Image.FLIP_LEFT_RIGHT) path = os.path.join(savedir, 'img_{}_{}.jpg'.format(i, j)) mirror_path = os.path.join(savedir, 'img_{}_{}_mirror.jpg'.format(i, j)) cropped_img.save(path, "JPEG") mirror_img.save(mirror_path, "JPEG") print('Processed {}\n'.format(f)) except OSError: continue def resize(savedir, NCHW=True, min_size=4, max_size=1024, max_mem=0.8, use_uint8=True, filter=Image.BICUBIC): resized_img_dir = os.path.join(savedir, '_temp') img_files = [os.path.join(resized_img_dir, f) for f in os.listdir(resized_img_dir)] np.random.shuffle(img_files) savedir = os.path.join(savedir, 'memmaps') if not os.path.exists(savedir): os.makedirs(savedir) sizes = [ 2 ** i for i in range( int(np.log2(min_size)), int(np.log2(max_size)) + 1 )] pixel_bytes = 3 if use_uint8 else 12 max_bytes = max_mem * 1e9 for s in sizes: max_imgs = int(max_bytes / (pixel_bytes * s ** 2)) batch_shape = (max_imgs, 3, s, s) if NCHW else (max_imgs, s, s, 3) batch = np.zeros(batch_shape, np.uint8) img_count = 0 batch_count = 0 for f in img_files: with Image.open(f) as img: width, height = img.size if width != s and height != s: img = img.resize((s, s), filter) img = np.asarray(img, np.uint8) if NCHW: img = np.transpose(img, (2, 0, 1)) batch[img_count] = img if img_count < max_imgs - 1: img_count += 1 else: path = os.path.join(savedir, '{}_{}.npy'.format(s, batch_count)) np.save(path, batch) print('Saved {}'.format(path)) img_count = 0 batch_count += 1 if img_count != 0: path = os.path.join(savedir, '{}_{}.npy'.format(s, batch_count)) np.save(path, batch[:img_count]) print('Saved {}'.format(path)) if __name__ == '__main__': imgdir = input('Image directory: ') savedir = input('Memmap directory: ') #generate_square_crops(imgdir, savedir) resize(savedir)