Repository: yihui-he/GAN-MNIST Branch: master Commit: d0149e08186c Files: 14 Total size: 40.7 KB Directory structure: gitextract_7ft1rpmj/ ├── .github/ │ └── FUNDING.yml ├── .gitignore ├── LICENSE ├── README.md ├── face/ │ ├── model.py │ ├── train.py │ └── util.py ├── lsun/ │ ├── model.py │ ├── train.py │ └── util.py └── mnist/ ├── load.py ├── model.py ├── train.py └── util.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/FUNDING.yml ================================================ # These are supported funding model platforms github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] patreon: # Replace with a single Patreon username open_collective: # Replace with a single Open Collective username ko_fi: yihuihe tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry liberapay: # Replace with a single Liberapay username issuehunt: # Replace with a single IssueHunt username otechie: # Replace with a single Otechie username lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] ================================================ FILE: .gitignore ================================================ *.pyc ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2018 Yihui He 何宜晖 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # GAN on MNIST with TensorFlow [GitHub - yihui-he/GAN-MNIST: Generative Adversarial Network for MNIST with tensorflow](https://github.com/yihui-he/GAN-MNIST) ![Untitled](https://github.com/ethanhe42/GAN-MNIST/assets/10027339/8f39f2b6-b2dd-4f0b-9fbf-f33247b0b70e) ![Untitled 1](https://github.com/ethanhe42/GAN-MNIST/assets/10027339/de4f99c4-f615-4954-9db1-e9883396dc3a) ### Tensorflow implementation - All the codes in this project are mere replication of [Theano version](https://github.com/Newmu/dcgan_code) ### Code - Under `face/` and `mnist/` - model.py - Definition of DCGAN model - train.py - Training the DCGAN model (and Generating samples time to time) - util.py - Image related utils ### Dataset - MNIST - http://yann.lecun.com/exdb/mnist/ - CelebA Face dataset - http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html - Download “img_align_celeba” images - Set “face_image_path” in train.py according to the path of downloaded dataset ### references https://github.com/carpedm20/DCGAN-tensorflow ### Citation If you find the code useful in your research, please consider citing: ``` @InProceedings{He_2017_ICCV, author = {He, Yihui and Zhang, Xiangyu and Sun, Jian}, title = {Channel Pruning for Accelerating Very Deep Neural Networks}, booktitle = {The IEEE International Conference on Computer Vision (ICCV)}, month = {Oct}, year = {2017} } ``` ================================================ FILE: face/model.py ================================================ #-*- coding: utf-8 -*- import tensorflow as tf import ipdb def batchnormalize(X, eps=1e-8, g=None, b=None): if X.get_shape().ndims == 4: mean = tf.reduce_mean(X, [0,1,2]) std = tf.reduce_mean( tf.square(X-mean), [0,1,2] ) X = (X-mean) / tf.sqrt(std+eps) if g is not None and b is not None: g = tf.reshape(g, [1,1,1,-1]) b = tf.reshape(b, [1,1,1,-1]) X = X*g + b elif X.get_shape().ndims == 2: mean = tf.reduce_mean(X, 0) std = tf.reduce_mean(tf.square(X-mean), 0) X = (X-mean) / tf.sqrt(std+eps)#std if g is not None and b is not None: g = tf.reshape(g, [1,-1]) b = tf.reshape(b, [1,-1]) X = X*g + b else: raise NotImplementedError return X def lrelu(X, leak=0.2): f1 = 0.5 * (1 + leak) f2 = 0.5 * (1 - leak) return f1 * X + f2 * tf.abs(X) def bce(o, t): o = tf.clip_by_value(o, 1e-7, 1. - 1e-7) return -(t * tf.log(o) + (1.- t)*tf.log(1. - o)) class DCGAN(): def __init__( self, batch_size=100, image_shape=[64,64,3], dim_z=100, dim_W1=1024, dim_W2=512, dim_W3=256, dim_W4=128, dim_W5=3, ): self.batch_size = batch_size self.image_shape = image_shape self.dim_z = dim_z self.dim_W1 = dim_W1 self.dim_W2 = dim_W2 self.dim_W3 = dim_W3 self.dim_W4 = dim_W4 self.dim_W5 = dim_W5 self.gen_W1 = tf.Variable(tf.random_normal([dim_z, dim_W1*4*4], stddev=0.02), name='gen_W1') self.gen_bn_g1 = tf.Variable( tf.random_normal([dim_W1*4*4], mean=1.0, stddev=0.02), name='gen_bn_g1') self.gen_bn_b1 = tf.Variable( tf.zeros([dim_W1*4*4]), name='gen_bn_b1') self.gen_W2 = tf.Variable(tf.random_normal([5,5,dim_W2, dim_W1], stddev=0.02), name='gen_W2') self.gen_bn_g2 = tf.Variable( tf.random_normal([dim_W2], mean=1.0, stddev=0.02), name='gen_bn_g2') self.gen_bn_b2 = tf.Variable( tf.zeros([dim_W2]), name='gen_bn_b2') self.gen_W3 = tf.Variable(tf.random_normal([5,5,dim_W3, dim_W2], stddev=0.02), name='gen_W3') self.gen_bn_g3 = tf.Variable( tf.random_normal([dim_W3], mean=1.0, stddev=0.02), name='gen_bn_g3') self.gen_bn_b3 = tf.Variable( tf.zeros([dim_W3]), name='gen_bn_b3') self.gen_W4 = tf.Variable(tf.random_normal([5,5,dim_W4, dim_W3], stddev=0.02), name='gen_W4') self.gen_bn_g4 = tf.Variable( tf.random_normal([dim_W4], mean=1.0, stddev=0.02), name='gen_bn_g4') self.gen_bn_b4 = tf.Variable( tf.zeros([dim_W4]), name='gen_bn_b4') self.gen_W5 = tf.Variable(tf.random_normal([5,5,dim_W5, dim_W4], stddev=0.02), name='gen_W5') self.discrim_W1 = tf.Variable(tf.random_normal([5,5,dim_W5,dim_W4], stddev=0.02), name='discrim_W1') self.discrim_W2 = tf.Variable(tf.random_normal([5,5,dim_W4,dim_W3], stddev=0.02), name='discrim_W2') self.discrim_bn_g2 = tf.Variable( tf.random_normal([dim_W3], mean=1.0, stddev=0.02), name='discrim_bn_g2') self.discrim_bn_b2 = tf.Variable( tf.zeros([dim_W3]), name='discrim_bn_b2') self.discrim_W3 = tf.Variable(tf.random_normal([5,5,dim_W3,dim_W2], stddev=0.02), name='discrim_W3') self.discrim_bn_g3 = tf.Variable( tf.random_normal([dim_W2], mean=1.0, stddev=0.02), name='discrim_bn_g3') self.discrim_bn_b3 = tf.Variable( tf.zeros([dim_W2]), name='discrim_bn_b3') self.discrim_W4 = tf.Variable(tf.random_normal([5,5,dim_W2,dim_W1], stddev=0.02), name='discrim_W4') self.discrim_bn_g4 = tf.Variable( tf.random_normal([dim_W1], mean=1.0, stddev=0.02), name='discrim_bn_g4') self.discrim_bn_b4 = tf.Variable( tf.zeros([dim_W1]), name='discrim_bn_b4') self.discrim_W5 = tf.Variable(tf.random_normal([4*4*dim_W1,1], stddev=0.02), name='discrim_W5') self.gen_params = [ self.gen_W1, self.gen_bn_g1, self.gen_bn_b1, self.gen_W2, self.gen_bn_g2, self.gen_bn_b2, self.gen_W3, self.gen_bn_g3, self.gen_bn_b3, self.gen_W4, self.gen_bn_g4, self.gen_bn_b4, self.gen_W5 ] self.discrim_params = [ self.discrim_W1, self.discrim_W2, self.discrim_bn_g2, self.discrim_bn_b2, self.discrim_W3, self.discrim_bn_g3, self.discrim_bn_b3, self.discrim_W4, self.discrim_bn_g4, self.discrim_bn_b4, self.discrim_W5 ] def build_model(self): Z = tf.placeholder(tf.float32, [self.batch_size, self.dim_z]) image_real = tf.placeholder(tf.float32, [self.batch_size]+self.image_shape) image_gen = self.generate(Z) p_real, h_real = self.discriminate(image_real) p_gen, h_gen = self.discriminate(image_gen) discrim_cost_real = bce(p_real, tf.ones_like(p_real)) discrim_cost_gen = bce(p_gen, tf.zeros_like(p_gen)) discrim_cost = tf.reduce_mean(discrim_cost_real) + tf.reduce_mean(discrim_cost_gen) gen_cost = tf.reduce_mean(bce( p_gen, tf.ones_like(p_gen) )) return Z, image_real, discrim_cost, gen_cost, p_real, p_gen, h_real, h_gen def discriminate(self, image): h1 = lrelu( tf.nn.conv2d( image, self.discrim_W1, strides=[1,2,2,1], padding='SAME' )) h2 = lrelu( batchnormalize( tf.nn.conv2d( h1, self.discrim_W2, strides=[1,2,2,1], padding='SAME'), g=self.discrim_bn_g2, b=self.discrim_bn_b2) ) h3 = lrelu( batchnormalize( tf.nn.conv2d( h2, self.discrim_W3, strides=[1,2,2,1], padding='SAME'), g=self.discrim_bn_g3, b=self.discrim_bn_b3) ) h4 = lrelu( batchnormalize( tf.nn.conv2d( h3, self.discrim_W4, strides=[1,2,2,1], padding='SAME'), g=self.discrim_bn_g4, b=self.discrim_bn_b4) ) h4 = tf.reshape(h4, [self.batch_size, -1]) h5 = tf.matmul( h4, self.discrim_W5 ) y = tf.nn.sigmoid(h5) return y, h5 def generate(self, Z): h1 = tf.nn.relu(batchnormalize(tf.matmul(Z, self.gen_W1), g=self.gen_bn_g1, b=self.gen_bn_b1)) h1 = tf.reshape(h1, [self.batch_size,4,4,self.dim_W1]) output_shape_l2 = [self.batch_size,8,8,self.dim_W2] h2 = tf.nn.conv2d_transpose(h1, self.gen_W2, output_shape=output_shape_l2, strides=[1,2,2,1]) h2 = tf.nn.relu( batchnormalize(h2, g=self.gen_bn_g2, b=self.gen_bn_b2) ) output_shape_l3 = [self.batch_size,16,16,self.dim_W3] h3 = tf.nn.conv2d_transpose(h2, self.gen_W3, output_shape=output_shape_l3, strides=[1,2,2,1]) h3 = tf.nn.relu( batchnormalize(h3, g=self.gen_bn_g3, b=self.gen_bn_b3) ) output_shape_l4 = [self.batch_size,32,32,self.dim_W4] h4 = tf.nn.conv2d_transpose(h3, self.gen_W4, output_shape=output_shape_l4, strides=[1,2,2,1]) h4 = tf.nn.relu( batchnormalize(h4, g=self.gen_bn_g4, b=self.gen_bn_b4) ) output_shape_l5 = [self.batch_size,64,64,self.dim_W5] h5 = tf.nn.conv2d_transpose(h4, self.gen_W5, output_shape=output_shape_l5, strides=[1,2,2,1]) x = tf.nn.tanh(h5) return x def samples_generator(self, batch_size): Z = tf.placeholder(tf.float32, [batch_size, self.dim_z]) h1 = tf.nn.relu(batchnormalize(tf.matmul(Z, self.gen_W1))) h1 = tf.reshape(h1, [batch_size,4,4,self.dim_W1]) output_shape_l2 = [batch_size,8,8,self.dim_W2] h2 = tf.nn.conv2d_transpose(h1, self.gen_W2, output_shape=output_shape_l2, strides=[1,2,2,1]) h2 = tf.nn.relu( batchnormalize(h2) ) output_shape_l3 = [batch_size,16,16,self.dim_W3] h3 = tf.nn.conv2d_transpose(h2, self.gen_W3, output_shape=output_shape_l3, strides=[1,2,2,1]) h3 = tf.nn.relu( batchnormalize(h3) ) output_shape_l4 = [batch_size,32,32,self.dim_W4] h4 = tf.nn.conv2d_transpose(h3, self.gen_W4, output_shape=output_shape_l4, strides=[1,2,2,1]) h4 = tf.nn.relu( batchnormalize(h4) ) output_shape_l5 = [batch_size,64,64,self.dim_W5] h5 = tf.nn.conv2d_transpose(h4, self.gen_W5, output_shape=output_shape_l5, strides=[1,2,2,1]) x = tf.nn.tanh(h5) return Z, x ================================================ FILE: face/train.py ================================================ import ipdb import os import pandas as pd import numpy as np from model import * from util import * n_epochs = 100 learning_rate = 0.0002 batch_size = 128 image_shape = [64,64,3] dim_z = 100 dim_W1 = 1024 dim_W2 = 512 dim_W3 = 256 dim_W4 = 128 dim_W5 = 3 visualize_dim=196 face_image_path = '/media/storage3/Study/data/celeb/img_align_celeba' face_images = filter(lambda x: x.endswith('jpg'), os.listdir(face_image_path)) dcgan_model = DCGAN( batch_size=batch_size, image_shape=image_shape, dim_z=dim_z, dim_W1=dim_W1, dim_W2=dim_W2, dim_W3=dim_W3, dim_W4=dim_W4, dim_W5=dim_W5 ) Z_tf, image_tf, d_cost_tf, g_cost_tf, p_real, p_gen, h_real, h_gen = dcgan_model.build_model() sess = tf.InteractiveSession() saver = tf.train.Saver(max_to_keep=10) discrim_vars = filter(lambda x: x.name.startswith('discrim'), tf.trainable_variables()) gen_vars = filter(lambda x: x.name.startswith('gen'), tf.trainable_variables()) train_op_discrim = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(d_cost_tf, var_list=discrim_vars) train_op_gen = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(g_cost_tf, var_list=gen_vars) Z_tf_sample, image_tf_sample = dcgan_model.samples_generator(batch_size=visualize_dim) tf.initialize_all_variables().run() Z_np_sample = np.random.uniform(-1, 1, size=(visualize_dim,dim_z)) iterations = 0 k = 2 for epoch in range(n_epochs): np.random.shuffle(face_images) for start, end in zip( range(0, len(face_images), batch_size), range(batch_size, len(face_images), batch_size) ): batch_image_files = face_images[start:end] batch_images = map(lambda x: crop_resize( os.path.join( face_image_path, x) ), batch_image_files) batch_images = np.array(batch_images).astype(np.float32) batch_z = np.random.uniform(-1, 1, size=[batch_size, dim_z]).astype(np.float32) if np.mod( iterations, k ) == 0: _, gen_loss_val = sess.run( [train_op_gen, g_cost_tf], feed_dict={ Z_tf:batch_z, }) discrim_loss_val, p_real_val, p_gen_val, h_real_val, h_gen_val = sess.run([d_cost_tf,p_real,p_gen, h_real, h_gen], feed_dict={Z_tf:batch_z, image_tf:batch_images}) print("=========== updating G ==========") print("iteration:", iterations) print("gen loss:", gen_loss_val) print("discrim loss:", discrim_loss_val) else: _, discrim_loss_val = sess.run( [train_op_discrim, d_cost_tf], feed_dict={ Z_tf:batch_z, image_tf:batch_images }) gen_loss_val, p_real_val, p_gen_val, h_real_val, h_gen_val = sess.run([g_cost_tf, p_real, p_gen, h_real, h_gen], feed_dict={Z_tf:batch_z, image_tf:batch_images}) print("=========== updating D ==========") print("iteration:", iterations) print("gen loss:", gen_loss_val) print("discrim loss:", discrim_loss_val) ipdb.set_trace() if np.mod(iterations, 100) == 0: generated_samples = sess.run( image_tf_sample, feed_dict={ Z_tf_sample:Z_np_sample }) generated_samples = (generated_samples + 1.)/2. save_visualization(generated_samples, (14,14), save_path='./vis/sample_'+str(iterations/100)+'.jpg') iterations += 1 ================================================ FILE: face/util.py ================================================ import cv2 import scipy.misc import ipdb import numpy as np def crop_resize(image_path, resize_shape=(64,64)): image = cv2.imread(image_path) height, width, channel = image.shape if width == height: resized_image = cv2.resize(image, resize_shape) elif width > height: resized_image = cv2.resize(image, (int(width * float(resize_shape[0])/height), resize_shape[1])) cropping_length = int( (resized_image.shape[1] - resize_shape[0]) / 2) resized_image = resized_image[:,cropping_length:cropping_length+resize_shape[1]] else: resized_image = cv2.resize(image, (resize_shape[0], int(height * float(resize_shape[1])/width))) cropping_length = int( (resized_image.shape[0] - resize_shape[1]) / 2) resized_image = resized_image[cropping_length:cropping_length+resize_shape[0], :] return resized_image/127.5 - 1 def save_visualization(X, (nh, nw), save_path='./vis/sample.jpg'): h,w = X.shape[1], X.shape[2] img = np.zeros((h * nh, w * nw, 3)) for n,x in enumerate(X): j = n / nw i = n % nw img[j*h:j*h+h, i*w:i*w+w, :] = x scipy.misc.imsave(save_path, img) ================================================ FILE: lsun/model.py ================================================ #-*- coding: utf-8 -*- import tensorflow as tf import ipdb def batchnormalize(X, eps=1e-8, g=None, b=None): if X.get_shape().ndims == 4: mean = tf.reduce_mean(X, [0,1,2]) std = tf.reduce_mean( tf.square(X-mean), [0,1,2] ) X = (X-mean) / tf.sqrt(std+eps) if g is not None and b is not None: g = tf.reshape(g, [1,1,1,-1]) b = tf.reshape(b, [1,1,1,-1]) X = X*g + b elif X.get_shape().ndims == 2: mean = tf.reduce_mean(X, 0) std = tf.reduce_mean(tf.square(X-mean), 0) X = (X-mean) / tf.sqrt(std+eps)#std if g is not None and b is not None: g = tf.reshape(g, [1,-1]) b = tf.reshape(b, [1,-1]) X = X*g + b else: raise NotImplementedError return X def lrelu(X, leak=0.2): f1 = 0.5 * (1 + leak) f2 = 0.5 * (1 - leak) return f1 * X + f2 * tf.abs(X) def bce(o, t): o = tf.clip_by_value(o, 1e-7, 1. - 1e-7) return -(t * tf.log(o) + (1.- t)*tf.log(1. - o)) class DCGAN(): def __init__( self, batch_size=100, image_shape=[64,64,3], dim_z=100, dim_W1=1024, dim_W2=512, dim_W3=256, dim_W4=128, dim_W5=3, ): self.batch_size = batch_size self.image_shape = image_shape self.dim_z = dim_z self.dim_W1 = dim_W1 self.dim_W2 = dim_W2 self.dim_W3 = dim_W3 self.dim_W4 = dim_W4 self.dim_W5 = dim_W5 self.gen_W1 = tf.Variable(tf.truncated_normal([dim_z, dim_W1*4*4], stddev=0.02), name='gen_W1') self.gen_bn_g1 = tf.Variable( tf.truncated_normal([dim_W1*4*4], mean=1.0, stddev=0.02), name='gen_bn_g1') self.gen_bn_b1 = tf.Variable( tf.zeros([dim_W1*4*4]), name='gen_bn_b1') self.gen_W2 = tf.Variable(tf.truncated_normal([5,5,dim_W2, dim_W1], stddev=0.02), name='gen_W2') self.gen_bn_g2 = tf.Variable( tf.truncated_normal([dim_W2], mean=1.0, stddev=0.02), name='gen_bn_g2') self.gen_bn_b2 = tf.Variable( tf.zeros([dim_W2]), name='gen_bn_b2') self.gen_W3 = tf.Variable(tf.truncated_normal([5,5,dim_W3, dim_W2], stddev=0.02), name='gen_W3') self.gen_bn_g3 = tf.Variable( tf.truncated_normal([dim_W3], mean=1.0, stddev=0.02), name='gen_bn_g3') self.gen_bn_b3 = tf.Variable( tf.zeros([dim_W3]), name='gen_bn_b3') self.gen_W4 = tf.Variable(tf.truncated_normal([5,5,dim_W4, dim_W3], stddev=0.02), name='gen_W4') self.gen_bn_g4 = tf.Variable( tf.truncated_normal([dim_W4], mean=1.0, stddev=0.02), name='gen_bn_g4') self.gen_bn_b4 = tf.Variable( tf.zeros([dim_W4]), name='gen_bn_b4') self.gen_W5 = tf.Variable(tf.truncated_normal([5,5,dim_W5, dim_W4], stddev=0.02), name='gen_W5') self.discrim_W1 = tf.Variable(tf.truncated_normal([5,5,dim_W5,dim_W4], stddev=0.02), name='discrim_W1') self.discrim_W2 = tf.Variable(tf.truncated_normal([5,5,dim_W4,dim_W3], stddev=0.02), name='discrim_W2') self.discrim_bn_g2 = tf.Variable( tf.truncated_normal([dim_W3], mean=1.0, stddev=0.02), name='discrim_bn_g2') self.discrim_bn_b2 = tf.Variable( tf.zeros([dim_W3]), name='discrim_bn_b2') self.discrim_W3 = tf.Variable(tf.truncated_normal([5,5,dim_W3,dim_W2], stddev=0.02), name='discrim_W3') self.discrim_bn_g3 = tf.Variable( tf.truncated_normal([dim_W2], mean=1.0, stddev=0.02), name='discrim_bn_g3') self.discrim_bn_b3 = tf.Variable( tf.zeros([dim_W2]), name='discrim_bn_b3') self.discrim_W4 = tf.Variable(tf.truncated_normal([5,5,dim_W2,dim_W1], stddev=0.02), name='discrim_W4') self.discrim_bn_g4 = tf.Variable( tf.truncated_normal([dim_W1], mean=1.0, stddev=0.02), name='discrim_bn_g4') self.discrim_bn_b4 = tf.Variable( tf.zeros([dim_W1]), name='discrim_bn_b4') self.discrim_W5 = tf.Variable(tf.truncated_normal([4*4*dim_W1,1], stddev=0.02), name='discrim_W5') self.gen_params = [ self.gen_W1, self.gen_bn_g1, self.gen_bn_b1, self.gen_W2, self.gen_bn_g2, self.gen_bn_b2, self.gen_W3, self.gen_bn_g3, self.gen_bn_b3, self.gen_W4, self.gen_bn_g4, self.gen_bn_b4, self.gen_W5 ] self.discrim_params = [ self.discrim_W1, self.discrim_W2, self.discrim_bn_g2, self.discrim_bn_b2, self.discrim_W3, self.discrim_bn_g3, self.discrim_bn_b3, self.discrim_W4, self.discrim_bn_g4, self.discrim_bn_b4, self.discrim_W5 ] def build_model(self): Z = tf.placeholder(tf.float32, [self.batch_size, self.dim_z]) image_real = tf.placeholder(tf.float32, [self.batch_size]+self.image_shape) image_gen = self.generate(Z) p_real, h_real = self.discriminate(image_real) p_gen, h_gen = self.discriminate(image_gen) discrim_cost_real = bce(p_real, tf.ones_like(p_real)) discrim_cost_gen = bce(p_gen, tf.zeros_like(p_gen)) discrim_cost = tf.reduce_mean(discrim_cost_real) + tf.reduce_mean(discrim_cost_gen) gen_cost = tf.reduce_mean(bce( p_gen, tf.ones_like(p_gen) )) return Z, image_real, discrim_cost, gen_cost, p_real, p_gen, h_real, h_gen def discriminate(self, image): h1 = lrelu( tf.nn.conv2d( image, self.discrim_W1, strides=[1,2,2,1], padding='SAME' )) h2 = lrelu( batchnormalize( tf.nn.conv2d( h1, self.discrim_W2, strides=[1,2,2,1], padding='SAME'), g=self.discrim_bn_g2, b=self.discrim_bn_b2) ) h3 = lrelu( batchnormalize( tf.nn.conv2d( h2, self.discrim_W3, strides=[1,2,2,1], padding='SAME'), g=self.discrim_bn_g3, b=self.discrim_bn_b3) ) h4 = lrelu( batchnormalize( tf.nn.conv2d( h3, self.discrim_W4, strides=[1,2,2,1], padding='SAME'), g=self.discrim_bn_g4, b=self.discrim_bn_b4) ) h4 = tf.reshape(h4, [self.batch_size, -1]) h5 = tf.matmul( h4, self.discrim_W5 ) y = tf.nn.sigmoid(h5) return y, h5 def generate(self, Z): h1 = tf.nn.relu(batchnormalize(tf.matmul(Z, self.gen_W1), g=self.gen_bn_g1, b=self.gen_bn_b1)) h1 = tf.reshape(h1, [self.batch_size,4,4,self.dim_W1]) output_shape_l2 = [self.batch_size,8,8,self.dim_W2] h2 = tf.nn.conv2d_transpose(h1, self.gen_W2, output_shape=output_shape_l2, strides=[1,2,2,1]) h2 = tf.nn.relu( batchnormalize(h2, g=self.gen_bn_g2, b=self.gen_bn_b2) ) output_shape_l3 = [self.batch_size,16,16,self.dim_W3] h3 = tf.nn.conv2d_transpose(h2, self.gen_W3, output_shape=output_shape_l3, strides=[1,2,2,1]) h3 = tf.nn.relu( batchnormalize(h3, g=self.gen_bn_g3, b=self.gen_bn_b3) ) output_shape_l4 = [self.batch_size,32,32,self.dim_W4] h4 = tf.nn.conv2d_transpose(h3, self.gen_W4, output_shape=output_shape_l4, strides=[1,2,2,1]) h4 = tf.nn.relu( batchnormalize(h4, g=self.gen_bn_g4, b=self.gen_bn_b4) ) output_shape_l5 = [self.batch_size,64,64,self.dim_W5] h5 = tf.nn.conv2d_transpose(h4, self.gen_W5, output_shape=output_shape_l5, strides=[1,2,2,1]) x = tf.nn.tanh(h5) return x def samples_generator(self, batch_size): Z = tf.placeholder(tf.float32, [batch_size, self.dim_z]) h1 = tf.nn.relu(batchnormalize(tf.matmul(Z, self.gen_W1))) h1 = tf.reshape(h1, [batch_size,4,4,self.dim_W1]) output_shape_l2 = [batch_size,8,8,self.dim_W2] h2 = tf.nn.conv2d_transpose(h1, self.gen_W2, output_shape=output_shape_l2, strides=[1,2,2,1]) h2 = tf.nn.relu( batchnormalize(h2) ) output_shape_l3 = [batch_size,16,16,self.dim_W3] h3 = tf.nn.conv2d_transpose(h2, self.gen_W3, output_shape=output_shape_l3, strides=[1,2,2,1]) h3 = tf.nn.relu( batchnormalize(h3) ) output_shape_l4 = [batch_size,32,32,self.dim_W4] h4 = tf.nn.conv2d_transpose(h3, self.gen_W4, output_shape=output_shape_l4, strides=[1,2,2,1]) h4 = tf.nn.relu( batchnormalize(h4) ) output_shape_l5 = [batch_size,64,64,self.dim_W5] h5 = tf.nn.conv2d_transpose(h4, self.gen_W5, output_shape=output_shape_l5, strides=[1,2,2,1]) x = tf.nn.tanh(h5) return Z, x ================================================ FILE: lsun/train.py ================================================ import ipdb import os import pandas as pd import numpy as np import lmdb from glob import glob from model import * from util import * n_epochs = 100 learning_rate = 0.0002 batch_size = 128 image_shape = [64,64,3] dim_z = 100 dim_W1 = 512#1024 dim_W2 = 256#512 dim_W3 = 128#256 dim_W4 = 64#128 dim_W5 = 3 visualize_dim=196 lsun_image_path = '/media/storage3/Study/data/lsun/images/0' lsun_images = [] for dir,_,_ in os.walk(lsun_image_path): lsun_images.append(glob(os.path.join(dir, '*.jpg'))) lsun_images = lsun_images[0] dcgan_model = DCGAN( batch_size=batch_size, image_shape=image_shape, dim_z=dim_z, dim_W1=dim_W1, dim_W2=dim_W2, dim_W3=dim_W3, dim_W4=dim_W4, dim_W5=dim_W5 ) Z_tf, image_tf, d_cost_tf, g_cost_tf, p_real, p_gen, h_real, h_gen = dcgan_model.build_model() sess = tf.InteractiveSession() saver = tf.train.Saver(max_to_keep=10) discrim_vars = filter(lambda x: x.name.startswith('discrim'), tf.trainable_variables()) gen_vars = filter(lambda x: x.name.startswith('gen'), tf.trainable_variables()) train_op_discrim = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(d_cost_tf, var_list=discrim_vars) train_op_gen = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(g_cost_tf, var_list=gen_vars) Z_tf_sample, image_tf_sample = dcgan_model.samples_generator(batch_size=visualize_dim) tf.initialize_all_variables().run() Z_np_sample = np.random.uniform(-1, 1, size=(visualize_dim,dim_z)) iterations = 0 k = 2 for epoch in range(n_epochs): for start, end in zip( range(0, len(lsun_images), batch_size), range(batch_size, len(lsun_images), batch_size) ): batch_image_files = lsun_images[start:end] batch_images = map(lambda x: crop_resize( os.path.join( lsun_image_path, x) ), batch_image_files) batch_images = np.array(batch_images).astype(np.float32) batch_z = np.random.uniform(-1, 1, size=[batch_size, dim_z]).astype(np.float32) p_real_val, p_gen_val, h_real_val, h_gen_val = sess.run([p_real, p_gen, h_real, h_gen], feed_dict={Z_tf:batch_z, image_tf:batch_images}) if np.mod( iterations, k ) != 0: _, gen_loss_val = sess.run( [train_op_gen, g_cost_tf], feed_dict={ Z_tf:batch_z, }) print "=========== updating G ==========" print "iteration:", iterations print "gen loss:", gen_loss_val else: _, discrim_loss_val = sess.run( [train_op_discrim, d_cost_tf], feed_dict={ Z_tf:batch_z, image_tf:batch_images }) print "=========== updating D ==========" print "iteration:", iterations print "discrim loss:", discrim_loss_val print "real h:", h_real_val.mean()," gen h:", h_gen_val.mean() if np.mod(iterations, 100) == 0: generated_samples = sess.run( image_tf_sample, feed_dict={ Z_tf_sample:Z_np_sample }) generated_samples = (generated_samples + 1.)/2. save_visualization(generated_samples, (14,14), save_path='./vis/sample_'+str(iterations/100)+'.jpg') iterations += 1 ================================================ FILE: lsun/util.py ================================================ import cv2 import scipy.misc import ipdb import numpy as np def crop_resize(image_path, resize_shape=(64,64)): image = cv2.imread(image_path) height, width, channel = image.shape if width == height: resized_image = cv2.resize(image, resize_shape) elif width > height: resized_image = cv2.resize(image, (int(width * float(resize_shape[0])/height), resize_shape[1])) cropping_length = int( (resized_image.shape[1] - resize_shape[0]) / 2) resized_image = resized_image[:,cropping_length:cropping_length+resize_shape[1]] else: resized_image = cv2.resize(image, (resize_shape[0], int(height * float(resize_shape[1])/width))) cropping_length = int( (resized_image.shape[0] - resize_shape[1]) / 2) resized_image = resized_image[cropping_length:cropping_length+resize_shape[0], :] return (resized_image - 127.5) / 127.5 #return resized_image/127.5 - 1 def save_visualization(X, (nh, nw), save_path='./vis/sample.jpg'): h,w = X.shape[1], X.shape[2] img = np.zeros((h * nh, w * nw, 3)) for n,x in enumerate(X): j = n / nw i = n % nw img[j*h:j*h+h, i*w:i*w+w, :] = x scipy.misc.imsave(save_path, img) ================================================ FILE: mnist/load.py ================================================ import sys sys.path.append('..') import numpy as np import os data_dir = 'data/' def mnist(): fd = open(os.path.join(data_dir,'train-images-idx3-ubyte')) loaded = np.fromfile(file=fd,dtype=np.uint8) trX = loaded[16:].reshape((60000,28*28)).astype(float) fd = open(os.path.join(data_dir,'train-labels-idx1-ubyte')) loaded = np.fromfile(file=fd,dtype=np.uint8) trY = loaded[8:].reshape((60000)) fd = open(os.path.join(data_dir,'t10k-images-idx3-ubyte')) loaded = np.fromfile(file=fd,dtype=np.uint8) teX = loaded[16:].reshape((10000,28*28)).astype(float) fd = open(os.path.join(data_dir,'t10k-labels-idx1-ubyte')) loaded = np.fromfile(file=fd,dtype=np.uint8) teY = loaded[8:].reshape((10000)) trY = np.asarray(trY) teY = np.asarray(teY) return trX, teX, trY, teY def mnist_with_valid_set(): trX, teX, trY, teY = mnist() train_inds = np.arange(len(trX)) np.random.shuffle(train_inds) trX = trX[train_inds] trY = trY[train_inds] #trX, trY = shuffle(trX, trY) vaX = trX[50000:] vaY = trY[50000:] trX = trX[:50000] trY = trY[:50000] return trX, vaX, teX, trY, vaY, teY ================================================ FILE: mnist/model.py ================================================ #-*- coding: utf-8 -*- import tensorflow as tf def batchnormalize(X, eps=1e-8, g=None, b=None): if X.get_shape().ndims == 4: mean = tf.reduce_mean(X, [0,1,2]) std = tf.reduce_mean( tf.square(X-mean), [0,1,2] ) X = (X-mean) / tf.sqrt(std+eps) if g is not None and b is not None: g = tf.reshape(g, [1,1,1,-1]) b = tf.reshape(b, [1,1,1,-1]) X = X*g + b elif X.get_shape().ndims == 2: mean = tf.reduce_mean(X, 0) std = tf.reduce_mean(tf.square(X-mean), 0) X = (X-mean) / tf.sqrt(std+eps) if g is not None and b is not None: g = tf.reshape(g, [1,-1]) b = tf.reshape(b, [1,-1]) X = X*g + b else: raise NotImplementedError return X def lrelu(X, leak=0.2): f1 = 0.5 * (1 + leak) f2 = 0.5 * (1 - leak) return f1 * X + f2 * tf.abs(X) def bce(o, t): o = tf.clip_by_value(o, 1e-7, 1. - 1e-7) return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=o, labels=t)) class DCGAN(): def __init__( self, batch_size=100, image_shape=[28,28,1], dim_z=100, dim_y=10, dim_W1=1024, dim_W2=128, dim_W3=64, dim_channel=1, ): self.batch_size = batch_size self.image_shape = image_shape self.dim_z = dim_z self.dim_y = dim_y self.dim_W1 = dim_W1 self.dim_W2 = dim_W2 self.dim_W3 = dim_W3 self.dim_channel = dim_channel self.gen_W1 = tf.Variable(tf.random_normal([dim_z+dim_y, dim_W1], stddev=0.02), name='gen_W1') self.gen_W2 = tf.Variable(tf.random_normal([dim_W1+dim_y, dim_W2*7*7], stddev=0.02), name='gen_W2') self.gen_W3 = tf.Variable(tf.random_normal([5,5,dim_W3,dim_W2+dim_y], stddev=0.02), name='gen_W3') self.gen_W4 = tf.Variable(tf.random_normal([5,5,dim_channel,dim_W3+dim_y], stddev=0.02), name='gen_W4') self.discrim_W1 = tf.Variable(tf.random_normal([5,5,dim_channel+dim_y,dim_W3], stddev=0.02), name='discrim_W1') self.discrim_W2 = tf.Variable(tf.random_normal([5,5,dim_W3+dim_y,dim_W2], stddev=0.02), name='discrim_W2') self.discrim_W3 = tf.Variable(tf.random_normal([dim_W2*7*7+dim_y,dim_W1], stddev=0.02), name='discrim_W3') self.discrim_W4 = tf.Variable(tf.random_normal([dim_W1+dim_y,1], stddev=0.02), name='discrim_W4') def build_model(self): Z = tf.placeholder(tf.float32, [self.batch_size, self.dim_z]) Y = tf.placeholder(tf.float32, [self.batch_size, self.dim_y]) image_real = tf.placeholder(tf.float32, [self.batch_size]+self.image_shape) h4 = self.generate(Z,Y) image_gen = tf.nn.sigmoid(h4) raw_real = self.discriminate(image_real, Y) p_real = tf.nn.sigmoid(raw_real) raw_gen = self.discriminate(image_gen, Y) p_gen = tf.nn.sigmoid(raw_gen) discrim_cost_real = bce(raw_real, tf.ones_like(raw_real)) discrim_cost_gen = bce(raw_gen, tf.zeros_like(raw_gen)) discrim_cost = discrim_cost_real + discrim_cost_gen gen_cost = bce( raw_gen, tf.ones_like(raw_gen) ) return Z, Y, image_real, discrim_cost, gen_cost, p_real, p_gen def discriminate(self, image, Y): yb = tf.reshape(Y, tf.stack([self.batch_size, 1, 1, self.dim_y])) X = tf.concat(axis=3, values=[image, yb*tf.ones([self.batch_size, 28, 28, self.dim_y])]) h1 = lrelu( tf.nn.conv2d( X, self.discrim_W1, strides=[1,2,2,1], padding='SAME' )) h1 = tf.concat(axis=3, values=[h1, yb*tf.ones([self.batch_size, 14, 14, self.dim_y])]) h2 = lrelu( batchnormalize( tf.nn.conv2d( h1, self.discrim_W2, strides=[1,2,2,1], padding='SAME')) ) h2 = tf.reshape(h2, [self.batch_size, -1]) h2 = tf.concat(axis=1, values=[h2, Y]) h3 = lrelu( batchnormalize( tf.matmul(h2, self.discrim_W3 ) )) h3 = tf.concat(axis=1, values=[h3, Y]) h4 = lrelu(batchnormalize(tf.matmul(h3,self.discrim_W4))) return h4 def generate(self, Z, Y): yb = tf.reshape(Y, [self.batch_size, 1, 1, self.dim_y]) Z = tf.concat(axis=1, values=[Z,Y]) h1 = tf.nn.relu(batchnormalize(tf.matmul(Z, self.gen_W1))) h1 = tf.concat(axis=1, values=[h1, Y]) h2 = tf.nn.relu(batchnormalize(tf.matmul(h1, self.gen_W2))) h2 = tf.reshape(h2, [self.batch_size,7,7,self.dim_W2]) h2 = tf.concat(axis=3, values=[h2, yb*tf.ones([self.batch_size, 7, 7, self.dim_y])]) output_shape_l3 = [self.batch_size,14,14,self.dim_W3] h3 = tf.nn.conv2d_transpose(h2, self.gen_W3, output_shape=output_shape_l3, strides=[1,2,2,1]) h3 = tf.nn.relu( batchnormalize(h3) ) h3 = tf.concat(axis=3, values=[h3, yb*tf.ones([self.batch_size, 14,14,self.dim_y])] ) output_shape_l4 = [self.batch_size,28,28,self.dim_channel] h4 = tf.nn.conv2d_transpose(h3, self.gen_W4, output_shape=output_shape_l4, strides=[1,2,2,1]) return h4 def samples_generator(self, batch_size): Z = tf.placeholder(tf.float32, [batch_size, self.dim_z]) Y = tf.placeholder(tf.float32, [batch_size, self.dim_y]) yb = tf.reshape(Y, [batch_size, 1, 1, self.dim_y]) Z_ = tf.concat(axis=1, values=[Z,Y]) h1 = tf.nn.relu(batchnormalize(tf.matmul(Z_, self.gen_W1))) h1 = tf.concat(axis=1, values=[h1, Y]) h2 = tf.nn.relu(batchnormalize(tf.matmul(h1, self.gen_W2))) h2 = tf.reshape(h2, [batch_size,7,7,self.dim_W2]) h2 = tf.concat(axis=3, values=[h2, yb*tf.ones([batch_size, 7, 7, self.dim_y])]) output_shape_l3 = [batch_size,14,14,self.dim_W3] h3 = tf.nn.conv2d_transpose(h2, self.gen_W3, output_shape=output_shape_l3, strides=[1,2,2,1]) h3 = tf.nn.relu( batchnormalize(h3) ) h3 = tf.concat(axis=3, values=[h3, yb*tf.ones([batch_size, 14,14,self.dim_y])] ) output_shape_l4 = [batch_size,28,28,self.dim_channel] h4 = tf.nn.conv2d_transpose(h3, self.gen_W4, output_shape=output_shape_l4, strides=[1,2,2,1]) x = tf.nn.sigmoid(h4) return Z,Y,x ================================================ FILE: mnist/train.py ================================================ import os import numpy as np from model import * from util import * from load import mnist_with_valid_set n_epochs = 100 learning_rate = 0.0002 batch_size = 128 image_shape = [28,28,1] dim_z = 100 dim_W1 = 1024 dim_W2 = 128 dim_W3 = 64 dim_channel = 1 visualize_dim=196 trX, vaX, teX, trY, vaY, teY = mnist_with_valid_set() dcgan_model = DCGAN( batch_size=batch_size, image_shape=image_shape, dim_z=dim_z, dim_W1=dim_W1, dim_W2=dim_W2, dim_W3=dim_W3, ) Z_tf, Y_tf, image_tf, d_cost_tf, g_cost_tf, p_real, p_gen = dcgan_model.build_model() sess = tf.InteractiveSession() saver = tf.train.Saver(max_to_keep=10) discrim_vars = filter(lambda x: x.name.startswith('discrim'), tf.trainable_variables()) gen_vars = filter(lambda x: x.name.startswith('gen'), tf.trainable_variables()) discrim_vars = [i for i in discrim_vars] gen_vars = [i for i in gen_vars] train_op_discrim = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(d_cost_tf, var_list=discrim_vars) train_op_gen = tf.train.AdamOptimizer(learning_rate, beta1=0.5).minimize(g_cost_tf, var_list=gen_vars) Z_tf_sample, Y_tf_sample, image_tf_sample = dcgan_model.samples_generator(batch_size=visualize_dim) tf.global_variables_initializer().run() Z_np_sample = np.random.uniform(-1, 1, size=(visualize_dim,dim_z)) Y_np_sample = OneHot( np.random.randint(10, size=[visualize_dim])) iterations = 0 k = 2 step = 200 for epoch in range(n_epochs): index = np.arange(len(trY)) np.random.shuffle(index) trX = trX[index] trY = trY[index] for start, end in zip( range(0, len(trY), batch_size), range(batch_size, len(trY), batch_size) ): Xs = trX[start:end].reshape( [-1, 28, 28, 1]) / 255. Ys = OneHot(trY[start:end]) Zs = np.random.uniform(-1, 1, size=[batch_size, dim_z]).astype(np.float32) if np.mod( iterations, k ) != 0: _, gen_loss_val = sess.run( [train_op_gen, g_cost_tf], feed_dict={ Z_tf:Zs, Y_tf:Ys }) discrim_loss_val, p_real_val, p_gen_val = sess.run([d_cost_tf,p_real,p_gen], feed_dict={Z_tf:Zs, image_tf:Xs, Y_tf:Ys}) print("=========== updating G ==========") print("iteration:", iterations) print("gen loss:", gen_loss_val) print("discrim loss:", discrim_loss_val) else: _, discrim_loss_val = sess.run( [train_op_discrim, d_cost_tf], feed_dict={ Z_tf:Zs, Y_tf:Ys, image_tf:Xs }) gen_loss_val, p_real_val, p_gen_val = sess.run([g_cost_tf, p_real, p_gen], feed_dict={Z_tf:Zs, image_tf:Xs, Y_tf:Ys}) print("=========== updating D ==========") print("iteration:", iterations) print("gen loss:", gen_loss_val) print("discrim loss:", discrim_loss_val) print("Average P(real)=", p_real_val.mean()) print("Average P(gen)=", p_gen_val.mean()) if np.mod(iterations, step) == 0: generated_samples = sess.run( image_tf_sample, feed_dict={ Z_tf_sample:Z_np_sample, Y_tf_sample:Y_np_sample }) generated_samples = (generated_samples + 1.)/2. save_visualization(generated_samples, (14,14), save_path='./vis/sample_%04d.jpg' % int(iterations/step)) iterations += 1 ================================================ FILE: mnist/util.py ================================================ import cv2 import scipy.misc import numpy as np def OneHot(X, n=None, negative_class=0.): X = np.asarray(X).flatten() if n is None: n = np.max(X) + 1 Xoh = np.ones((len(X), n)) * negative_class Xoh[np.arange(len(X)), X] = 1. return Xoh def crop_resize(image_path, resize_shape=(64,64)): image = cv2.imread(image_path) height, width, channel = image.shape if width == height: resized_image = cv2.resize(image, resize_shape) elif width > height: resized_image = cv2.resize(image, (int(width * float(resize_shape[0])//height), resize_shape[1])) cropping_length = int( (resized_image.shape[1] - resize_shape[0]) // 2) resized_image = resized_image[:,cropping_length:cropping_length+resize_shape[1]] else: resized_image = cv2.resize(image, (resize_shape[0], int(height * float(resize_shape[1])/width))) cropping_length = int( (resized_image.shape[0] - resize_shape[1]) // 2) resized_image = resized_image[cropping_length:cropping_length+resize_shape[0], :] return resized_image/127.5 - 1 def save_visualization(X, nh_nw, save_path='./vis/sample.jpg'): h,w = X.shape[1], X.shape[2] img = np.zeros((h * nh_nw[0], w * nh_nw[1], 3)) for n,x in enumerate(X): j = n // nh_nw[1] i = n % nh_nw[1] img[j*h:j*h+h, i*w:i*w+w, :] = x scipy.misc.imsave(save_path, img)