Repository: lazyprogrammer/facial-expression-recognition Branch: master Commit: df09c8c56ce8 Files: 12 Total size: 32.9 KB Directory structure: gitextract_o6irw1n6/ ├── .gitignore ├── README.md ├── ann.py ├── ann_sigmoid.py ├── ann_tf.py ├── ann_theano.py ├── cnn_tf.py ├── cnn_theano.py ├── logistic.py ├── logistic_sigmoid.py ├── show_images.py └── util.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ fer2013* .DS_Store *.pyc TEST.py ================================================ FILE: README.md ================================================ Relevant Course URLs: * https://deeplearningcourses.com/c/data-science-logistic-regression-in-python/ * https://deeplearningcourses.com/c/data-science-deep-learning-in-python/ * https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow/ * https://deeplearningcourses.com/c/deep-learning-convolutional-neural-networks-theano-tensorflow/ Data: https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge If you get "An error occurred: Data not found": https://archive.org/download/fer2013_202311/fer2013.csv ================================================ FILE: ann.py ================================================ from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import numpy as np import matplotlib.pyplot as plt from util import getData, softmax, cost2, y2indicator, error_rate, relu from sklearn.utils import shuffle class ANN(object): def __init__(self, M): self.M = M def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-6, reg=1e-6, epochs=10000, show_fig=False): N, D = X.shape K = len(set(Y)) T = y2indicator(Y) self.W1 = np.random.randn(D, self.M) / np.sqrt(D) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M) self.b2 = np.zeros(K) costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation and cost calculation pY, Z = self.forward(X) # gradient descent step pY_T = pY - T self.W2 -= learning_rate*(Z.T.dot(pY_T) + reg*self.W2) self.b2 -= learning_rate*(pY_T.sum(axis=0) + reg*self.b2) # dZ = pY_T.dot(self.W2.T) * (Z > 0) # relu dZ = pY_T.dot(self.W2.T) * (1 - Z*Z) # tanh self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1) self.b1 -= learning_rate*(dZ.sum(axis=0) + reg*self.b1) if i % 10 == 0: pYvalid, _ = self.forward(Xvalid) c = cost2(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) print("i:", i, "cost:", c, "error:", e) if e < best_validation_error: best_validation_error = e print("best_validation_error:", best_validation_error) if show_fig: plt.plot(costs) plt.show() def forward(self, X): # Z = relu(X.dot(self.W1) + self.b1) Z = np.tanh(X.dot(self.W1) + self.b1) return softmax(Z.dot(self.W2) + self.b2), Z def predict(self, X): pY, _ = self.forward(X) return np.argmax(pY, axis=1) def score(self, X, Y): prediction = self.predict(X) return 1 - error_rate(Y, prediction) def main(): Xtrain, Ytrain, Xvalid, Yvalid = getData() model = ANN(200) model.fit(Xtrain, Ytrain, Xvalid, Yvalid, reg=0, show_fig=True) print(model.score(Xvalid, Yvalid)) if __name__ == '__main__': main() ================================================ FILE: ann_sigmoid.py ================================================ from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import numpy as np import matplotlib.pyplot as plt from sklearn.utils import shuffle from util import getBinaryData, sigmoid, sigmoid_cost, error_rate, relu class ANN(object): def __init__(self, M): self.M = M def fit(self, X, Y, learning_rate=5e-7, reg=1.0, epochs=10000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.W1 = np.random.randn(D, self.M) / np.sqrt(D) self.b1 = np.zeros(self.M) self.W2 = np.random.randn(self.M) / np.sqrt(self.M) self.b2 = 0 costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation and cost calculation pY, Z = self.forward(X) # gradient descent step pY_Y = pY - Y self.W2 -= learning_rate*(Z.T.dot(pY_Y) + reg*self.W2) self.b2 -= learning_rate*((pY_Y).sum() + reg*self.b2) # print "(pY_Y).dot(self.W2.T) shape:", (pY_Y).dot(self.W2.T).shape # print "Z shape:", Z.shape dZ = np.outer(pY_Y, self.W2) * (Z > 0) # dZ = np.outer(pY_Y, self.W2) * (1 - Z*Z) self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1) self.b1 -= learning_rate*(np.sum(dZ, axis=0) + reg*self.b1) if i % 20 == 0: pYvalid, _ = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print("i:", i, "cost:", c, "error:", e) if e < best_validation_error: best_validation_error = e print("best_validation_error:", best_validation_error) if show_fig: plt.plot(costs) plt.show() def forward(self, X): Z = relu(X.dot(self.W1) + self.b1) # Z = np.tanh(X.dot(self.W1) + self.b1) return sigmoid(Z.dot(self.W2) + self.b2), Z def predict(self, X): pY = self.forward(X) return np.round(pY) def score(self, X, Y): prediction = self.predict(X) return 1 - error_rate(Y, prediction) def main(): X, Y = getBinaryData() X0 = X[Y==0, :] X1 = X[Y==1, :] X1 = np.repeat(X1, 9, axis=0) X = np.vstack([X0, X1]) Y = np.array([0]*len(X0) + [1]*len(X1)) model = ANN(100) model.fit(X, Y, show_fig=True) if __name__ == '__main__': main() ================================================ FILE: ann_tf.py ================================================ from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import numpy as np import tensorflow as tf import matplotlib.pyplot as plt from util import getData, getBinaryData, y2indicator, error_rate, init_weight_and_bias from sklearn.utils import shuffle class HiddenLayer(object): def __init__(self, M1, M2, an_id): self.id = an_id self.M1 = M1 self.M2 = M2 W, b = init_weight_and_bias(M1, M2) self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) self.params = [self.W, self.b] def forward(self, X): return tf.nn.relu(tf.matmul(X, self.W) + self.b) class ANN(object): def __init__(self, hidden_layer_sizes): self.hidden_layer_sizes = hidden_layer_sizes def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, epochs=10, batch_sz=100, show_fig=False): K = len(set(Y)) # won't work later b/c we turn it into indicator # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.float32) # for calculating error rate Yvalid_flat = Yvalid Yvalid = y2indicator(Yvalid).astype(np.float32) # initialize hidden layers N, D = X.shape self.hidden_layers = [] M1 = D count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W.astype(np.float32)) self.b = tf.Variable(b.astype(np.float32)) # collect params for later use self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.params # set up theano functions and variables tfX = tf.placeholder(tf.float32, shape=(None, D), name='X') tfT = tf.placeholder(tf.float32, shape=(None, K), name='T') act = self.forward(tfX) rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=act, labels=tfT ) ) + rcost prediction = self.predict(tfX) train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost) n_batches = N // batch_sz costs = [] init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={tfX: Xvalid, tfT: Yvalid}) costs.append(c) p = session.run(prediction, feed_dict={tfX: Xvalid, tfT: Yvalid}) e = error_rate(Yvalid_flat, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) if show_fig: plt.plot(costs) plt.show() def forward(self, X): Z = X for h in self.hidden_layers: Z = h.forward(Z) return tf.matmul(Z, self.W) + self.b def predict(self, X): act = self.forward(X) return tf.argmax(act, 1) def main(): Xtrain, Ytrain, Xvalid, Yvalid = getData() model = ANN([2000, 1000, 500]) model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True) if __name__ == '__main__': main() ================================================ FILE: ann_theano.py ================================================ from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import numpy as np import theano import theano.tensor as T import matplotlib.pyplot as plt from util import getData, getBinaryData, error_rate, relu, init_weight_and_bias from sklearn.utils import shuffle def rmsprop(cost, params, lr, mu, decay, eps): grads = T.grad(cost, params) updates = [] for p, g in zip(params, grads): # cache ones = np.ones_like(p.get_value(), dtype=np.float32) c = theano.shared(ones) new_c = decay*c + (np.float32(1.0) - decay)*g*g # momentum zeros = np.zeros_like(p.get_value(), dtype=np.float32) m = theano.shared(zeros) new_m = mu*m - lr*g / T.sqrt(new_c + eps) # param update new_p = p + new_m # append the updates updates.append((c, new_c)) updates.append((m, new_m)) updates.append((p, new_p)) return updates class HiddenLayer(object): def __init__(self, M1, M2, an_id): self.id = an_id self.M1 = M1 self.M2 = M2 W, b = init_weight_and_bias(M1, M2) self.W = theano.shared(W, 'W_%s' % self.id) self.b = theano.shared(b, 'b_%s' % self.id) self.params = [self.W, self.b] def forward(self, X): return relu(X.dot(self.W) + self.b) class ANN(object): def __init__(self, hidden_layer_sizes): self.hidden_layer_sizes = hidden_layer_sizes def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, eps=1e-8, epochs=10, batch_sz=100, show_fig=False): # downcast learning_rate = np.float32(learning_rate) mu = np.float32(mu) decay = np.float32(decay) reg = np.float32(reg) eps = np.float32(eps) X = X.astype(np.float32) Xvalid = Xvalid.astype(np.float32) Y = Y.astype(np.int32) Yvalid = Yvalid.astype(np.int32) # initialize hidden layers N, D = X.shape K = len(set(Y)) self.hidden_layers = [] M1 = D count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_logreg') self.b = theano.shared(b, 'b_logreg') # collect params for later use self.params = [self.W, self.b] for h in self.hidden_layers: self.params += h.params # set up theano functions and variables thX = T.fmatrix('X') thY = T.ivector('Y') pY = self.th_forward(thX) rcost = reg*T.sum([(p*p).sum() for p in self.params]) cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost prediction = self.th_predict(thX) # actual prediction function self.predict_op = theano.function(inputs=[thX], outputs=prediction) cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) updates = rmsprop(cost, self.params, learning_rate, mu, decay, eps) train_op = theano.function( inputs=[thX, thY], updates=updates ) n_batches = N // batch_sz costs = [] for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] train_op(Xbatch, Ybatch) if j % 20 == 0: c, p = cost_predict_op(Xvalid, Yvalid) costs.append(c) e = error_rate(Yvalid, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) if show_fig: plt.plot(costs) plt.show() def th_forward(self, X): Z = X for h in self.hidden_layers: Z = h.forward(Z) return T.nnet.softmax(Z.dot(self.W) + self.b) def th_predict(self, X): pY = self.th_forward(X) return T.argmax(pY, axis=1) def predict(self, X): return self.predict_op(X) def main(): Xtrain, Ytrain, Xvalid, Yvalid = getData() model = ANN([2000, 1000, 500]) model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True) if __name__ == '__main__': main() ================================================ FILE: cnn_tf.py ================================================ from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import numpy as np import tensorflow as tf import matplotlib.pyplot as plt from sklearn.utils import shuffle from util import getImageData, error_rate, init_weight_and_bias, y2indicator from ann_tf import HiddenLayer # differences from Theano: # image dimensions are expected to be: N x width x height x color # filter shapes are expected to be: filter width x filter height x input feature maps x output feature maps def init_filter(shape, poolsz): w = np.random.randn(*shape) * np.sqrt(2) / np.sqrt(np.prod(shape[:-1]) + shape[-1]*np.prod(shape[:-2] / np.prod(poolsz))) return w.astype(np.float32) class ConvPoolLayer(object): def __init__(self, mi, mo, fw=5, fh=5, poolsz=(2, 2)): # mi = input feature map size # mo = output feature map size sz = (fw, fh, mi, mo) W0 = init_filter(sz, poolsz) self.W = tf.Variable(W0) b0 = np.zeros(mo, dtype=np.float32) self.b = tf.Variable(b0) self.poolsz = poolsz self.params = [self.W, self.b] def forward(self, X): conv_out = tf.nn.conv2d(X, self.W, strides=[1, 1, 1, 1], padding='SAME') conv_out = tf.nn.bias_add(conv_out, self.b) p1, p2 = self.poolsz pool_out = tf.nn.max_pool( conv_out, ksize=[1, p1, p2, 1], strides=[1, p1, p2, 1], padding='SAME' ) return tf.nn.relu(pool_out) class CNN(object): def __init__(self, convpool_layer_sizes, hidden_layer_sizes): self.convpool_layer_sizes = convpool_layer_sizes self.hidden_layer_sizes = hidden_layer_sizes def fit(self, X, Y, Xvalid, Yvalid, lr=1e-2, mu=0.9, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=5, show_fig=True): lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) K = len(set(Y)) # make a validation set X, Y = shuffle(X, Y) X = X.astype(np.float32) Y = y2indicator(Y).astype(np.float32) Yvalid = y2indicator(Yvalid).astype(np.float32) Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate # initialize convpool layers N, width, height, c = X.shape mi = c outw = width outh = height self.convpool_layers = [] for mo, fw, fh in self.convpool_layer_sizes: layer = ConvPoolLayer(mi, mo, fw, fh) self.convpool_layers.append(layer) outw = outw // 2 outh = outh // 2 mi = mo # initialize mlp layers self.hidden_layers = [] M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 # logistic regression layer W, b = init_weight_and_bias(M1, K) self.W = tf.Variable(W, 'W_logreg') self.b = tf.Variable(b, 'b_logreg') # collect params for later use self.params = [self.W, self.b] for h in self.convpool_layers: self.params += h.params for h in self.hidden_layers: self.params += h.params # set up tensorflow functions and variables tfX = tf.placeholder(tf.float32, shape=(None, width, height, c), name='X') tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y') act = self.forward(tfX) rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params]) cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=act, labels=tfY ) ) + rcost prediction = self.predict(tfX) train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost) n_batches = N // batch_sz costs = [] init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch}) if j % 20 == 0: c = session.run(cost, feed_dict={tfX: Xvalid, tfY: Yvalid}) costs.append(c) p = session.run(prediction, feed_dict={tfX: Xvalid, tfY: Yvalid}) e = error_rate(Yvalid_flat, p) print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e) if show_fig: plt.plot(costs) plt.show() def forward(self, X): Z = X for c in self.convpool_layers: Z = c.forward(Z) Z_shape = Z.get_shape().as_list() Z = tf.reshape(Z, [-1, np.prod(Z_shape[1:])]) for h in self.hidden_layers: Z = h.forward(Z) return tf.matmul(Z, self.W) + self.b def predict(self, X): pY = self.forward(X) return tf.argmax(pY, 1) def main(): Xtrain, Ytrain, Xvalid, Yvalid = getImageData() # reshape X for tf: N x H x W x C Xtrain = Xtrain.transpose((0, 2, 3, 1)) Xvalid = Xvalid.transpose((0, 2, 3, 1)) model = CNN( convpool_layer_sizes=[(20, 5, 5), (20, 5, 5)], hidden_layer_sizes=[500, 300], ) model.fit(Xtrain, Ytrain, Xvalid, Yvalid) if __name__ == '__main__': main() ================================================ FILE: cnn_theano.py ================================================ from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import numpy as np import theano import theano.tensor as T import matplotlib.pyplot as plt from sklearn.utils import shuffle from theano.tensor.nnet import conv2d from theano.tensor.signal.pool import pool_2d from util import getImageData, error_rate, init_weight_and_bias, init_filter from ann_theano import HiddenLayer, rmsprop class ConvPoolLayer(object): def __init__(self, mi, mo, fw=5, fh=5, poolsz=(2, 2)): # mi = input feature map size # mo = output feature map size sz = (mo, mi, fw, fh) W0 = init_filter(sz, poolsz) self.W = theano.shared(W0) b0 = np.zeros(mo, dtype=np.float32) self.b = theano.shared(b0) self.poolsz = poolsz self.params = [self.W, self.b] def forward(self, X): conv_out = conv2d(input=X, filters=self.W) pooled_out = pool_2d( input=conv_out, ws=self.poolsz, ignore_border=True, mode='max', ) return T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) class CNN(object): def __init__(self, convpool_layer_sizes, hidden_layer_sizes): self.convpool_layer_sizes = convpool_layer_sizes self.hidden_layer_sizes = hidden_layer_sizes def fit(self, X, Y, Xvalid, Yvalid, lr=1e-3, mu=0.99, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=3, show_fig=True): # downcast lr = np.float32(lr) mu = np.float32(mu) reg = np.float32(reg) decay = np.float32(decay) eps = np.float32(eps) X = X.astype(np.float32) Xvalid = Xvalid.astype(np.float32) Y = Y.astype(np.int32) Yvalid = Yvalid.astype(np.int32) # initialize convpool layers N, c, width, height = X.shape mi = c outw = width outh = height self.convpool_layers = [] for mo, fw, fh in self.convpool_layer_sizes: layer = ConvPoolLayer(mi, mo, fw, fh) self.convpool_layers.append(layer) outw = (outw - fw + 1) // 2 outh = (outh - fh + 1) // 2 mi = mo # initialize mlp layers K = len(set(Y)) self.hidden_layers = [] M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer count = 0 for M2 in self.hidden_layer_sizes: h = HiddenLayer(M1, M2, count) self.hidden_layers.append(h) M1 = M2 count += 1 # logistic regression layer W, b = init_weight_and_bias(M1, K) self.W = theano.shared(W, 'W_logreg') self.b = theano.shared(b, 'b_logreg') # collect params for later use self.params = [self.W, self.b] for c in self.convpool_layers: self.params += c.params for h in self.hidden_layers: self.params += h.params # set up theano functions and variables thX = T.tensor4('X', dtype='float32') thY = T.ivector('Y') pY = self.forward(thX) rcost = reg*T.sum([(p*p).sum() for p in self.params]) cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost prediction = self.th_predict(thX) cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction]) updates = rmsprop(cost, self.params, lr, mu, decay, eps) train_op = theano.function( inputs=[thX, thY], outputs=cost, updates=updates ) n_batches = N // batch_sz costs = [] for i in range(epochs): X, Y = shuffle(X, Y) for j in range(n_batches): Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)] Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)] train_c = train_op(Xbatch, Ybatch) if j % 20 == 0: c, p = cost_predict_op(Xvalid, Yvalid) costs.append(c) e = error_rate(Yvalid, p) print( "i:", i, "j:", j, "nb:", n_batches, "train cost:", train_c, "cost:", c, "error rate:", e ) if show_fig: plt.plot(costs) plt.show() def forward(self, X): Z = X for c in self.convpool_layers: Z = c.forward(Z) Z = Z.flatten(ndim=2) for h in self.hidden_layers: Z = h.forward(Z) return T.nnet.softmax(Z.dot(self.W) + self.b) def th_predict(self, X): pY = self.forward(X) return T.argmax(pY, axis=1) def main(): Xtrain, Ytrain, Xvalid, Yvalid = getImageData() model = CNN( convpool_layer_sizes=[(20, 5, 5), (20, 5, 5)], hidden_layer_sizes=[500, 300], ) model.fit(Xtrain, Ytrain, Xvalid, Yvalid) if __name__ == '__main__': main() ================================================ FILE: logistic.py ================================================ from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import numpy as np import matplotlib.pyplot as plt from util import getData, softmax, cost, y2indicator, error_rate from sklearn.utils import shuffle class LogisticModel(object): def __init__(self): pass def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-7, reg=0., epochs=10000, show_fig=False): Tvalid = y2indicator(Yvalid) N, D = X.shape K = len(set(Y)) T = y2indicator(Y) self.W = np.random.randn(D, K) / np.sqrt(D) self.b = np.zeros(K) costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation and cost calculation pY = self.forward(X) # gradient descent step self.W -= learning_rate*(X.T.dot(pY - T) + reg*self.W) self.b -= learning_rate*((pY - T).sum(axis=0) + reg*self.b) if i % 10 == 0: pYvalid = self.forward(Xvalid) c = cost(Tvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.argmax(pYvalid, axis=1)) print("i:", i, "cost:", c, "error:", e) if e < best_validation_error: best_validation_error = e print("best_validation_error:", best_validation_error) if show_fig: plt.plot(costs) plt.show() def forward(self, X): return softmax(X.dot(self.W) + self.b) def predict(self, X): pY = self.forward(X) return np.argmax(pY, axis=1) def score(self, X, Y): prediction = self.predict(X) return 1 - error_rate(Y, prediction) def main(): Xtrain, Ytrain, Xvalid, Yvalid = getData() model = LogisticModel() model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True) print(model.score(Xvalid, Yvalid)) if __name__ == '__main__': main() ================================================ FILE: logistic_sigmoid.py ================================================ from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import numpy as np import matplotlib.pyplot as plt from sklearn.utils import shuffle from util import getBinaryData, sigmoid, sigmoid_cost, error_rate class LogisticModel(object): def __init__(self): pass def fit(self, X, Y, learning_rate=1e-6, reg=0., epochs=120000, show_fig=False): X, Y = shuffle(X, Y) Xvalid, Yvalid = X[-1000:], Y[-1000:] X, Y = X[:-1000], Y[:-1000] N, D = X.shape self.W = np.random.randn(D) / np.sqrt(D) self.b = 0 costs = [] best_validation_error = 1 for i in range(epochs): # forward propagation and cost calculation pY = self.forward(X) # gradient descent step self.W -= learning_rate*(X.T.dot(pY - Y) + reg*self.W) self.b -= learning_rate*((pY - Y).sum() + reg*self.b) if i % 20 == 0: pYvalid = self.forward(Xvalid) c = sigmoid_cost(Yvalid, pYvalid) costs.append(c) e = error_rate(Yvalid, np.round(pYvalid)) print("i:", i, "cost:", c, "error:", e) if e < best_validation_error: best_validation_error = e print("best_validation_error:", best_validation_error) if show_fig: plt.plot(costs) plt.show() def forward(self, X): return sigmoid(X.dot(self.W) + self.b) def predict(self, X): pY = self.forward(X) return np.round(pY) def score(self, X, Y): prediction = self.predict(X) return 1 - error_rate(Y, prediction) def main(): X, Y = getBinaryData() X0 = X[Y==0, :] X1 = X[Y==1, :] X1 = np.repeat(X1, 9, axis=0) X = np.vstack([X0, X1]) Y = np.array([0]*len(X0) + [1]*len(X1)) model = LogisticModel() model.fit(X, Y, show_fig=True) model.score(X, Y) # scores = cross_val_score(model, X, Y, cv=5) # print "score mean:", np.mean(scores), "stdev:", np.std(scores) if __name__ == '__main__': main() ================================================ FILE: show_images.py ================================================ from __future__ import print_function, division from builtins import range, input # Note: you may need to update your version of future # sudo pip install -U future import numpy as np import matplotlib.pyplot as plt from util import getData label_map = ['Anger', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'] def main(): X, Y, _, _ = getData(balance_ones=False) while True: for i in range(7): x, y = X[Y==i], Y[Y==i] N = len(y) j = np.random.choice(N) plt.imshow(x[j].reshape(48, 48), cmap='gray') plt.title(label_map[y[j]]) plt.show() prompt = input('Quit? Enter Y:\n') if prompt.lower().startswith('y'): break if __name__ == '__main__': main() ================================================ FILE: util.py ================================================ from __future__ import print_function, division from builtins import range # Note: you may need to update your version of future # sudo pip install -U future import numpy as np import pandas as pd from sklearn.utils import shuffle def init_weight_and_bias(M1, M2): W = np.random.randn(M1, M2) / np.sqrt(M1) b = np.zeros(M2) return W.astype(np.float32), b.astype(np.float32) def init_filter(shape, poolsz): w = np.random.randn(*shape) * np.sqrt(2) / np.sqrt(np.prod(shape[1:]) + shape[0]*np.prod(shape[2:] / np.prod(poolsz))) return w.astype(np.float32) def relu(x): return x * (x > 0) def sigmoid(A): return 1 / (1 + np.exp(-A)) def softmax(A): expA = np.exp(A) return expA / expA.sum(axis=1, keepdims=True) def sigmoid_cost(T, Y): return -(T*np.log(Y) + (1-T)*np.log(1-Y)).sum() def cost(T, Y): return -(T*np.log(Y)).sum() def cost2(T, Y): # same as cost(), just uses the targets to index Y # instead of multiplying by a large indicator matrix with mostly 0s N = len(T) return -np.log(Y[np.arange(N), T]).mean() def error_rate(targets, predictions): return np.mean(targets != predictions) def y2indicator(y): N = len(y) K = len(set(y)) ind = np.zeros((N, K)) for i in range(N): ind[i, y[i]] = 1 return ind def getData(balance_ones=True, Ntest=1000): # images are 48x48 = 2304 size vectors Y = [] X = [] first = True for line in open('fer2013.csv'): if first: first = False else: row = line.split(',') Y.append(int(row[0])) X.append([int(p) for p in row[1].split()]) X, Y = np.array(X) / 255.0, np.array(Y) # shuffle and split X, Y = shuffle(X, Y) Xtrain, Ytrain = X[:-Ntest], Y[:-Ntest] Xvalid, Yvalid = X[-Ntest:], Y[-Ntest:] if balance_ones: # balance the 1 class X0, Y0 = Xtrain[Ytrain!=1, :], Ytrain[Ytrain!=1] X1 = Xtrain[Ytrain==1, :] X1 = np.repeat(X1, 9, axis=0) Xtrain = np.vstack([X0, X1]) Ytrain = np.concatenate((Y0, [1]*len(X1))) return Xtrain, Ytrain, Xvalid, Yvalid def getImageData(): Xtrain, Ytrain, Xvalid, Yvalid = getData() N, D = Xtrain.shape d = int(np.sqrt(D)) Xtrain = Xtrain.reshape(-1, 1, d, d) Xvalid = Xvalid.reshape(-1, 1, d, d) return Xtrain, Ytrain, Xvalid, Yvalid def getBinaryData(): Y = [] X = [] first = True for line in open('fer2013.csv'): if first: first = False else: row = line.split(',') y = int(row[0]) if y == 0 or y == 1: Y.append(y) X.append([int(p) for p in row[1].split()]) return np.array(X) / 255.0, np.array(Y) def crossValidation(model, X, Y, K=5): # split data into K parts X, Y = shuffle(X, Y) sz = len(Y) // K errors = [] for k in range(K): xtr = np.concatenate([ X[:k*sz, :], X[(k*sz + sz):, :] ]) ytr = np.concatenate([ Y[:k*sz], Y[(k*sz + sz):] ]) xte = X[k*sz:(k*sz + sz), :] yte = Y[k*sz:(k*sz + sz)] model.fit(xtr, ytr) err = model.score(xte, yte) errors.append(err) print("errors:", errors) return np.mean(errors)