master df09c8c56ce8 cached
12 files
32.9 KB
9.4k tokens
82 symbols
1 requests
Download .txt
Repository: lazyprogrammer/facial-expression-recognition
Branch: master
Commit: df09c8c56ce8
Files: 12
Total size: 32.9 KB

Directory structure:
gitextract_o6irw1n6/

├── .gitignore
├── README.md
├── ann.py
├── ann_sigmoid.py
├── ann_tf.py
├── ann_theano.py
├── cnn_tf.py
├── cnn_theano.py
├── logistic.py
├── logistic_sigmoid.py
├── show_images.py
└── util.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
fer2013*
.DS_Store
*.pyc
TEST.py


================================================
FILE: README.md
================================================
Relevant Course URLs:

* https://deeplearningcourses.com/c/data-science-logistic-regression-in-python/
* https://deeplearningcourses.com/c/data-science-deep-learning-in-python/
* https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow/
* https://deeplearningcourses.com/c/deep-learning-convolutional-neural-networks-theano-tensorflow/

Data:

https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge

If you get "An error occurred: Data not found":

https://archive.org/download/fer2013_202311/fer2013.csv


================================================
FILE: ann.py
================================================
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np
import matplotlib.pyplot as plt

from util import getData, softmax, cost2, y2indicator, error_rate, relu
from sklearn.utils import shuffle


class ANN(object):
    def __init__(self, M):
        self.M = M

    def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-6, reg=1e-6, epochs=10000, show_fig=False):

        N, D = X.shape
        K = len(set(Y))
        T = y2indicator(Y)
        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M)
        self.b2 = np.zeros(K)

        costs = []
        best_validation_error = 1
        for i in range(epochs):
            # forward propagation and cost calculation
            pY, Z = self.forward(X)

            # gradient descent step
            pY_T = pY - T
            self.W2 -= learning_rate*(Z.T.dot(pY_T) + reg*self.W2)
            self.b2 -= learning_rate*(pY_T.sum(axis=0) + reg*self.b2)
            # dZ = pY_T.dot(self.W2.T) * (Z > 0) # relu
            dZ = pY_T.dot(self.W2.T) * (1 - Z*Z) # tanh
            self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1)
            self.b1 -= learning_rate*(dZ.sum(axis=0) + reg*self.b1)

            if i % 10 == 0:
                pYvalid, _ = self.forward(Xvalid)
                c = cost2(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))
                print("i:", i, "cost:", c, "error:", e)
                if e < best_validation_error:
                    best_validation_error = e
        print("best_validation_error:", best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.show()


    def forward(self, X):
        # Z = relu(X.dot(self.W1) + self.b1)
        Z = np.tanh(X.dot(self.W1) + self.b1)
        return softmax(Z.dot(self.W2) + self.b2), Z

    def predict(self, X):
        pY, _ = self.forward(X)
        return np.argmax(pY, axis=1)

    def score(self, X, Y):
        prediction = self.predict(X)
        return 1 - error_rate(Y, prediction)


def main():
    Xtrain, Ytrain, Xvalid, Yvalid = getData()
    
    model = ANN(200)
    model.fit(Xtrain, Ytrain, Xvalid, Yvalid, reg=0, show_fig=True)
    print(model.score(Xvalid, Yvalid))

if __name__ == '__main__':
    main()


================================================
FILE: ann_sigmoid.py
================================================
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from util import getBinaryData, sigmoid, sigmoid_cost, error_rate, relu


class ANN(object):
    def __init__(self, M):
        self.M = M

    def fit(self, X, Y, learning_rate=5e-7, reg=1.0, epochs=10000, show_fig=False):
        X, Y = shuffle(X, Y)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        N, D = X.shape
        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)
        self.b1 = np.zeros(self.M)
        self.W2 = np.random.randn(self.M) / np.sqrt(self.M)
        self.b2 = 0

        costs = []
        best_validation_error = 1
        for i in range(epochs):
            # forward propagation and cost calculation
            pY, Z = self.forward(X)

            # gradient descent step
            pY_Y = pY - Y
            self.W2 -= learning_rate*(Z.T.dot(pY_Y) + reg*self.W2)
            self.b2 -= learning_rate*((pY_Y).sum() + reg*self.b2)

            # print "(pY_Y).dot(self.W2.T) shape:", (pY_Y).dot(self.W2.T).shape
            # print "Z shape:", Z.shape
            dZ = np.outer(pY_Y, self.W2) * (Z > 0)
            # dZ = np.outer(pY_Y, self.W2) * (1 - Z*Z)
            self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1)
            self.b1 -= learning_rate*(np.sum(dZ, axis=0) + reg*self.b1)

            if i % 20 == 0:
                pYvalid, _ = self.forward(Xvalid)
                c = sigmoid_cost(Yvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.round(pYvalid))
                print("i:", i, "cost:", c, "error:", e)
                if e < best_validation_error:
                    best_validation_error = e
        print("best_validation_error:", best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.show()


    def forward(self, X):
        Z = relu(X.dot(self.W1) + self.b1)
        # Z = np.tanh(X.dot(self.W1) + self.b1)
        return sigmoid(Z.dot(self.W2) + self.b2), Z


    def predict(self, X):
        pY = self.forward(X)
        return np.round(pY)


    def score(self, X, Y):
        prediction = self.predict(X)
        return 1 - error_rate(Y, prediction)


def main():
    X, Y = getBinaryData()

    X0 = X[Y==0, :]
    X1 = X[Y==1, :]
    X1 = np.repeat(X1, 9, axis=0)
    X = np.vstack([X0, X1])
    Y = np.array([0]*len(X0) + [1]*len(X1))
    
    model = ANN(100)
    model.fit(X, Y, show_fig=True)

if __name__ == '__main__':
    main()


================================================
FILE: ann_tf.py
================================================
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from util import getData, getBinaryData, y2indicator, error_rate, init_weight_and_bias
from sklearn.utils import shuffle


class HiddenLayer(object):
    def __init__(self, M1, M2, an_id):
        self.id = an_id
        self.M1 = M1
        self.M2 = M2
        W, b = init_weight_and_bias(M1, M2)
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))
        self.params = [self.W, self.b]

    def forward(self, X):
        return tf.nn.relu(tf.matmul(X, self.W) + self.b)


class ANN(object):
    def __init__(self, hidden_layer_sizes):
        self.hidden_layer_sizes = hidden_layer_sizes

    def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, epochs=10, batch_sz=100, show_fig=False):
        K = len(set(Y)) # won't work later b/c we turn it into indicator

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)

        # for calculating error rate
        Yvalid_flat = Yvalid
        Yvalid = y2indicator(Yvalid).astype(np.float32)

        # initialize hidden layers
        N, D = X.shape
        
        self.hidden_layers = []
        M1 = D
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W.astype(np.float32))
        self.b = tf.Variable(b.astype(np.float32))

        # collect params for later use
        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params

        # set up theano functions and variables
        tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')
        tfT = tf.placeholder(tf.float32, shape=(None, K), name='T')
        act = self.forward(tfX)

        rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=act,
                labels=tfT
            )
        ) + rcost
        prediction = self.predict(tfX)
        train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost)

        n_batches = N // batch_sz
        costs = []
        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                    Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch})

                    if j % 20 == 0:
                        c = session.run(cost, feed_dict={tfX: Xvalid, tfT: Yvalid})
                        costs.append(c)

                        p = session.run(prediction, feed_dict={tfX: Xvalid, tfT: Yvalid})
                        e = error_rate(Yvalid_flat, p)
                        print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)
        
        if show_fig:
            plt.plot(costs)
            plt.show()

    def forward(self, X):
        Z = X
        for h in self.hidden_layers:
            Z = h.forward(Z)
        return tf.matmul(Z, self.W) + self.b

    def predict(self, X):
        act = self.forward(X)
        return tf.argmax(act, 1)


def main():
    Xtrain, Ytrain, Xvalid, Yvalid = getData()
    model = ANN([2000, 1000, 500])
    model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True)

if __name__ == '__main__':
    main()


================================================
FILE: ann_theano.py
================================================
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np
import theano
import theano.tensor as T
import matplotlib.pyplot as plt

from util import getData, getBinaryData, error_rate, relu, init_weight_and_bias
from sklearn.utils import shuffle



def rmsprop(cost, params, lr, mu, decay, eps):
    grads = T.grad(cost, params)
    updates = []
    for p, g in zip(params, grads):
        # cache
        ones = np.ones_like(p.get_value(), dtype=np.float32)
        c = theano.shared(ones)
        new_c = decay*c + (np.float32(1.0) - decay)*g*g

        # momentum
        zeros = np.zeros_like(p.get_value(), dtype=np.float32)
        m = theano.shared(zeros)
        new_m = mu*m - lr*g / T.sqrt(new_c + eps)

        # param update
        new_p = p + new_m

        # append the updates
        updates.append((c, new_c))
        updates.append((m, new_m))
        updates.append((p, new_p))
    return updates


class HiddenLayer(object):
    def __init__(self, M1, M2, an_id):
        self.id = an_id
        self.M1 = M1
        self.M2 = M2
        W, b = init_weight_and_bias(M1, M2)
        self.W = theano.shared(W, 'W_%s' % self.id)
        self.b = theano.shared(b, 'b_%s' % self.id)
        self.params = [self.W, self.b]

    def forward(self, X):
        return relu(X.dot(self.W) + self.b)


class ANN(object):
    def __init__(self, hidden_layer_sizes):
        self.hidden_layer_sizes = hidden_layer_sizes

    def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, eps=1e-8, epochs=10, batch_sz=100, show_fig=False):
        # downcast
        learning_rate = np.float32(learning_rate)
        mu = np.float32(mu)
        decay = np.float32(decay)
        reg = np.float32(reg)
        eps = np.float32(eps)

        X = X.astype(np.float32)
        Xvalid = Xvalid.astype(np.float32)
        Y = Y.astype(np.int32)
        Yvalid = Yvalid.astype(np.int32)

        # initialize hidden layers
        N, D = X.shape
        K = len(set(Y))
        self.hidden_layers = []
        M1 = D
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for h in self.hidden_layers:
            self.params += h.params

        # set up theano functions and variables
        thX = T.fmatrix('X')
        thY = T.ivector('Y')
        pY = self.th_forward(thX)

        rcost = reg*T.sum([(p*p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.th_predict(thX)

        # actual prediction function
        self.predict_op = theano.function(inputs=[thX], outputs=prediction)
        cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])

        updates = rmsprop(cost, self.params, learning_rate, mu, decay, eps)
        train_op = theano.function(
            inputs=[thX, thY],
            updates=updates
        )

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)
        
        if show_fig:
            plt.plot(costs)
            plt.show()

    def th_forward(self, X):
        Z = X
        for h in self.hidden_layers:
            Z = h.forward(Z)
        return T.nnet.softmax(Z.dot(self.W) + self.b)

    def th_predict(self, X):
        pY = self.th_forward(X)
        return T.argmax(pY, axis=1)

    def predict(self, X):
        return self.predict_op(X)


def main():
    Xtrain, Ytrain, Xvalid, Yvalid = getData()
    model = ANN([2000, 1000, 500])
    model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True)

if __name__ == '__main__':
    main()


================================================
FILE: cnn_tf.py
================================================
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.utils import shuffle

from util import getImageData, error_rate, init_weight_and_bias, y2indicator
from ann_tf import HiddenLayer

# differences from Theano:
# image dimensions are expected to be: N x width x height x color
# filter shapes are expected to be: filter width x filter height x input feature maps x output feature maps


def init_filter(shape, poolsz):
    w = np.random.randn(*shape) * np.sqrt(2) / np.sqrt(np.prod(shape[:-1]) + shape[-1]*np.prod(shape[:-2] / np.prod(poolsz)))
    return w.astype(np.float32)


class ConvPoolLayer(object):
    def __init__(self, mi, mo, fw=5, fh=5, poolsz=(2, 2)):
        # mi = input feature map size
        # mo = output feature map size
        sz = (fw, fh, mi, mo)
        W0 = init_filter(sz, poolsz)
        self.W = tf.Variable(W0)
        b0 = np.zeros(mo, dtype=np.float32)
        self.b = tf.Variable(b0)
        self.poolsz = poolsz
        self.params = [self.W, self.b]

    def forward(self, X):
        conv_out = tf.nn.conv2d(X, self.W, strides=[1, 1, 1, 1], padding='SAME')
        conv_out = tf.nn.bias_add(conv_out, self.b)
        p1, p2 = self.poolsz
        pool_out = tf.nn.max_pool(
            conv_out,
            ksize=[1, p1, p2, 1],
            strides=[1, p1, p2, 1],
            padding='SAME'
        )
        return tf.nn.relu(pool_out)


class CNN(object):
    def __init__(self, convpool_layer_sizes, hidden_layer_sizes):
        self.convpool_layer_sizes = convpool_layer_sizes
        self.hidden_layer_sizes = hidden_layer_sizes

    def fit(self, X, Y, Xvalid, Yvalid, lr=1e-2, mu=0.9, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=5, show_fig=True):
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)
        K = len(set(Y))

        # make a validation set
        X, Y = shuffle(X, Y)
        X = X.astype(np.float32)
        Y = y2indicator(Y).astype(np.float32)

        Yvalid = y2indicator(Yvalid).astype(np.float32)
        Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate

        # initialize convpool layers
        N, width, height, c = X.shape
        mi = c
        outw = width
        outh = height
        self.convpool_layers = []
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(layer)
            outw = outw // 2
            outh = outh // 2
            mi = mo

        # initialize mlp layers
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        # logistic regression layer
        W, b = init_weight_and_bias(M1, K)
        self.W = tf.Variable(W, 'W_logreg')
        self.b = tf.Variable(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for h in self.convpool_layers:
            self.params += h.params
        for h in self.hidden_layers:
            self.params += h.params

        # set up tensorflow functions and variables
        tfX = tf.placeholder(tf.float32, shape=(None, width, height, c), name='X')
        tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y')
        act = self.forward(tfX)

        rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params])
        cost = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(
                logits=act,
                labels=tfY
            )
        ) + rcost
        prediction = self.predict(tfX)

        train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)

        n_batches = N // batch_sz
        costs = []
        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)
            for i in range(epochs):
                X, Y = shuffle(X, Y)
                for j in range(n_batches):
                    Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                    Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                    session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch})

                    if j % 20 == 0:
                        c = session.run(cost, feed_dict={tfX: Xvalid, tfY: Yvalid})
                        costs.append(c)

                        p = session.run(prediction, feed_dict={tfX: Xvalid, tfY: Yvalid})
                        e = error_rate(Yvalid_flat, p)
                        print("i:", i, "j:", j, "nb:", n_batches, "cost:", c, "error rate:", e)

        if show_fig:
            plt.plot(costs)
            plt.show()

    def forward(self, X):
        Z = X
        for c in self.convpool_layers:
            Z = c.forward(Z)
        Z_shape = Z.get_shape().as_list()
        Z = tf.reshape(Z, [-1, np.prod(Z_shape[1:])])
        for h in self.hidden_layers:
            Z = h.forward(Z)
        return tf.matmul(Z, self.W) + self.b

    def predict(self, X):
        pY = self.forward(X)
        return tf.argmax(pY, 1)


def main():
    Xtrain, Ytrain, Xvalid, Yvalid = getImageData()

    # reshape X for tf: N x H x W x C
    Xtrain = Xtrain.transpose((0, 2, 3, 1))
    Xvalid = Xvalid.transpose((0, 2, 3, 1))

    model = CNN(
        convpool_layer_sizes=[(20, 5, 5), (20, 5, 5)],
        hidden_layer_sizes=[500, 300],
    )
    model.fit(Xtrain, Ytrain, Xvalid, Yvalid)

if __name__ == '__main__':
    main()


================================================
FILE: cnn_theano.py
================================================
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np
import theano
import theano.tensor as T
import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from theano.tensor.nnet import conv2d
from theano.tensor.signal.pool import pool_2d

from util import getImageData, error_rate, init_weight_and_bias, init_filter
from ann_theano import HiddenLayer, rmsprop


class ConvPoolLayer(object):
    def __init__(self, mi, mo, fw=5, fh=5, poolsz=(2, 2)):
        # mi = input feature map size
        # mo = output feature map size
        sz = (mo, mi, fw, fh)
        W0 = init_filter(sz, poolsz)
        self.W = theano.shared(W0)
        b0 = np.zeros(mo, dtype=np.float32)
        self.b = theano.shared(b0)
        self.poolsz = poolsz
        self.params = [self.W, self.b]

    def forward(self, X):
        conv_out = conv2d(input=X, filters=self.W)
        pooled_out = pool_2d(
            input=conv_out,
            ws=self.poolsz,
            ignore_border=True,
            mode='max',
        )
        return T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))


class CNN(object):
    def __init__(self, convpool_layer_sizes, hidden_layer_sizes):
        self.convpool_layer_sizes = convpool_layer_sizes
        self.hidden_layer_sizes = hidden_layer_sizes

    def fit(self, X, Y, Xvalid, Yvalid, lr=1e-3, mu=0.99, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=3, show_fig=True):
        # downcast
        lr = np.float32(lr)
        mu = np.float32(mu)
        reg = np.float32(reg)
        decay = np.float32(decay)
        eps = np.float32(eps)

        X = X.astype(np.float32)
        Xvalid = Xvalid.astype(np.float32)
        Y = Y.astype(np.int32)
        Yvalid = Yvalid.astype(np.int32)

        # initialize convpool layers
        N, c, width, height = X.shape
        mi = c
        outw = width
        outh = height
        self.convpool_layers = []
        for mo, fw, fh in self.convpool_layer_sizes:
            layer = ConvPoolLayer(mi, mo, fw, fh)
            self.convpool_layers.append(layer)
            outw = (outw - fw + 1) // 2
            outh = (outh - fh + 1) // 2
            mi = mo

        # initialize mlp layers
        K = len(set(Y))
        self.hidden_layers = []
        M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer
        count = 0
        for M2 in self.hidden_layer_sizes:
            h = HiddenLayer(M1, M2, count)
            self.hidden_layers.append(h)
            M1 = M2
            count += 1

        # logistic regression layer
        W, b = init_weight_and_bias(M1, K)
        self.W = theano.shared(W, 'W_logreg')
        self.b = theano.shared(b, 'b_logreg')

        # collect params for later use
        self.params = [self.W, self.b]
        for c in self.convpool_layers:
            self.params += c.params
        for h in self.hidden_layers:
            self.params += h.params

        # set up theano functions and variables
        thX = T.tensor4('X', dtype='float32')
        thY = T.ivector('Y')
        pY = self.forward(thX)

        rcost = reg*T.sum([(p*p).sum() for p in self.params])
        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost
        prediction = self.th_predict(thX)

        cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])

        updates = rmsprop(cost, self.params, lr, mu, decay, eps)
        train_op = theano.function(
            inputs=[thX, thY],
            outputs=cost,
            updates=updates
        )

        n_batches = N // batch_sz
        costs = []
        for i in range(epochs):
            X, Y = shuffle(X, Y)
            for j in range(n_batches):
                Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]
                Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]

                train_c = train_op(Xbatch, Ybatch)

                if j % 20 == 0:
                    c, p = cost_predict_op(Xvalid, Yvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, p)
                    print(
                        "i:", i,
                        "j:", j,
                        "nb:", n_batches,
                        "train cost:", train_c,
                        "cost:", c,
                        "error rate:", e
                    )

        if show_fig:
            plt.plot(costs)
            plt.show()

    def forward(self, X):
        Z = X
        for c in self.convpool_layers:
            Z = c.forward(Z)
        Z = Z.flatten(ndim=2)
        for h in self.hidden_layers:
            Z = h.forward(Z)
        return T.nnet.softmax(Z.dot(self.W) + self.b)

    def th_predict(self, X):
        pY = self.forward(X)
        return T.argmax(pY, axis=1)


def main():
    Xtrain, Ytrain, Xvalid, Yvalid = getImageData()
    model = CNN(
        convpool_layer_sizes=[(20, 5, 5), (20, 5, 5)],
        hidden_layer_sizes=[500, 300],
    )
    model.fit(Xtrain, Ytrain, Xvalid, Yvalid)

if __name__ == '__main__':
    main()


================================================
FILE: logistic.py
================================================
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np
import matplotlib.pyplot as plt

from util import getData, softmax, cost, y2indicator, error_rate
from sklearn.utils import shuffle


class LogisticModel(object):
    def __init__(self):
        pass

    def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-7, reg=0., epochs=10000, show_fig=False):
        Tvalid = y2indicator(Yvalid)

        N, D = X.shape
        K = len(set(Y))
        T = y2indicator(Y)
        self.W = np.random.randn(D, K) / np.sqrt(D)
        self.b = np.zeros(K)

        costs = []
        best_validation_error = 1
        for i in range(epochs):
            # forward propagation and cost calculation
            pY = self.forward(X)

            # gradient descent step
            self.W -= learning_rate*(X.T.dot(pY - T) + reg*self.W)
            self.b -= learning_rate*((pY - T).sum(axis=0) + reg*self.b)

            if i % 10 == 0:
                pYvalid = self.forward(Xvalid)
                c = cost(Tvalid, pYvalid)
                costs.append(c)
                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))
                print("i:", i, "cost:", c, "error:", e)
                if e < best_validation_error:
                    best_validation_error = e
        print("best_validation_error:", best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.show()


    def forward(self, X):
        return softmax(X.dot(self.W) + self.b)

    def predict(self, X):
        pY = self.forward(X)
        return np.argmax(pY, axis=1)

    def score(self, X, Y):
        prediction = self.predict(X)
        return 1 - error_rate(Y, prediction)


def main():
    Xtrain, Ytrain, Xvalid, Yvalid = getData()
    
    model = LogisticModel()
    model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True)
    print(model.score(Xvalid, Yvalid))

if __name__ == '__main__':
    main()


================================================
FILE: logistic_sigmoid.py
================================================
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from util import getBinaryData, sigmoid, sigmoid_cost, error_rate


class LogisticModel(object):
    def __init__(self):
        pass

    def fit(self, X, Y, learning_rate=1e-6, reg=0., epochs=120000, show_fig=False):
        X, Y = shuffle(X, Y)
        Xvalid, Yvalid = X[-1000:], Y[-1000:]
        X, Y = X[:-1000], Y[:-1000]

        N, D = X.shape
        self.W = np.random.randn(D) / np.sqrt(D)
        self.b = 0

        costs = []
        best_validation_error = 1
        for i in range(epochs):
                # forward propagation and cost calculation
                pY = self.forward(X)

                # gradient descent step
                self.W -= learning_rate*(X.T.dot(pY - Y) + reg*self.W)
                self.b -= learning_rate*((pY - Y).sum() + reg*self.b)

                
                if i % 20 == 0:
                    pYvalid = self.forward(Xvalid)
                    c = sigmoid_cost(Yvalid, pYvalid)
                    costs.append(c)
                    e = error_rate(Yvalid, np.round(pYvalid))
                    print("i:", i, "cost:", c, "error:", e)
                    if e < best_validation_error:
                        best_validation_error = e
        print("best_validation_error:", best_validation_error)

        if show_fig:
            plt.plot(costs)
            plt.show()


    def forward(self, X):
        return sigmoid(X.dot(self.W) + self.b)

    def predict(self, X):
        pY = self.forward(X)
        return np.round(pY)


    def score(self, X, Y):
        prediction = self.predict(X)
        return 1 - error_rate(Y, prediction)


def main():
    X, Y = getBinaryData()

    X0 = X[Y==0, :]
    X1 = X[Y==1, :]
    X1 = np.repeat(X1, 9, axis=0)
    X = np.vstack([X0, X1])
    Y = np.array([0]*len(X0) + [1]*len(X1))
    
    model = LogisticModel()
    model.fit(X, Y, show_fig=True)
    model.score(X, Y)
    # scores = cross_val_score(model, X, Y, cv=5)
    # print "score mean:", np.mean(scores), "stdev:", np.std(scores)

if __name__ == '__main__':
    main()


================================================
FILE: show_images.py
================================================
from __future__ import print_function, division
from builtins import range, input
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np
import matplotlib.pyplot as plt

from util import getData

label_map = ['Anger', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

def main():
    X, Y, _, _ = getData(balance_ones=False)

    while True:
        for i in range(7):
            x, y = X[Y==i], Y[Y==i]
            N = len(y)
            j = np.random.choice(N)
            plt.imshow(x[j].reshape(48, 48), cmap='gray')
            plt.title(label_map[y[j]])
            plt.show()
        prompt = input('Quit? Enter Y:\n')
        if prompt.lower().startswith('y'):
            break


if __name__ == '__main__':
    main()


================================================
FILE: util.py
================================================
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future

import numpy as np
import pandas as pd
from sklearn.utils import shuffle


def init_weight_and_bias(M1, M2):
    W = np.random.randn(M1, M2) / np.sqrt(M1)
    b = np.zeros(M2)
    return W.astype(np.float32), b.astype(np.float32)


def init_filter(shape, poolsz):
    w = np.random.randn(*shape) * np.sqrt(2) / np.sqrt(np.prod(shape[1:]) + shape[0]*np.prod(shape[2:] / np.prod(poolsz)))
    return w.astype(np.float32)


def relu(x):
    return x * (x > 0)


def sigmoid(A):
    return 1 / (1 + np.exp(-A))


def softmax(A):
    expA = np.exp(A)
    return expA / expA.sum(axis=1, keepdims=True)


def sigmoid_cost(T, Y):
    return -(T*np.log(Y) + (1-T)*np.log(1-Y)).sum()


def cost(T, Y):
    return -(T*np.log(Y)).sum()


def cost2(T, Y):
    # same as cost(), just uses the targets to index Y
    # instead of multiplying by a large indicator matrix with mostly 0s
    N = len(T)
    return -np.log(Y[np.arange(N), T]).mean()


def error_rate(targets, predictions):
    return np.mean(targets != predictions)


def y2indicator(y):
    N = len(y)
    K = len(set(y))
    ind = np.zeros((N, K))
    for i in range(N):
        ind[i, y[i]] = 1
    return ind


def getData(balance_ones=True, Ntest=1000):
    # images are 48x48 = 2304 size vectors
    Y = []
    X = []
    first = True
    for line in open('fer2013.csv'):
        if first:
            first = False
        else:
            row = line.split(',')
            Y.append(int(row[0]))
            X.append([int(p) for p in row[1].split()])

    X, Y = np.array(X) / 255.0, np.array(Y)

    # shuffle and split
    X, Y = shuffle(X, Y)
    Xtrain, Ytrain = X[:-Ntest], Y[:-Ntest]
    Xvalid, Yvalid = X[-Ntest:], Y[-Ntest:]

    if balance_ones:
        # balance the 1 class
        X0, Y0 = Xtrain[Ytrain!=1, :], Ytrain[Ytrain!=1]
        X1 = Xtrain[Ytrain==1, :]
        X1 = np.repeat(X1, 9, axis=0)
        Xtrain = np.vstack([X0, X1])
        Ytrain = np.concatenate((Y0, [1]*len(X1)))

    return Xtrain, Ytrain, Xvalid, Yvalid


def getImageData():
    Xtrain, Ytrain, Xvalid, Yvalid = getData()
    N, D = Xtrain.shape
    d = int(np.sqrt(D))
    Xtrain = Xtrain.reshape(-1, 1, d, d)
    Xvalid = Xvalid.reshape(-1, 1, d, d)
    return Xtrain, Ytrain, Xvalid, Yvalid


def getBinaryData():
    Y = []
    X = []
    first = True
    for line in open('fer2013.csv'):
        if first:
            first = False
        else:
            row = line.split(',')
            y = int(row[0])
            if y == 0 or y == 1:
                Y.append(y)
                X.append([int(p) for p in row[1].split()])
    return np.array(X) / 255.0, np.array(Y)


def crossValidation(model, X, Y, K=5):
    # split data into K parts
    X, Y = shuffle(X, Y)
    sz = len(Y) // K
    errors = []
    for k in range(K):
        xtr = np.concatenate([ X[:k*sz, :], X[(k*sz + sz):, :] ])
        ytr = np.concatenate([ Y[:k*sz], Y[(k*sz + sz):] ])
        xte = X[k*sz:(k*sz + sz), :]
        yte = Y[k*sz:(k*sz + sz)]

        model.fit(xtr, ytr)
        err = model.score(xte, yte)
        errors.append(err)
    print("errors:", errors)
    return np.mean(errors)
Download .txt
gitextract_o6irw1n6/

├── .gitignore
├── README.md
├── ann.py
├── ann_sigmoid.py
├── ann_tf.py
├── ann_theano.py
├── cnn_tf.py
├── cnn_theano.py
├── logistic.py
├── logistic_sigmoid.py
├── show_images.py
└── util.py
Download .txt
SYMBOL INDEX (82 symbols across 10 files)

FILE: ann.py
  class ANN (line 13) | class ANN(object):
    method __init__ (line 14) | def __init__(self, M):
    method fit (line 17) | def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-6, reg=1e-6, epoc...
    method forward (line 57) | def forward(self, X):
    method predict (line 62) | def predict(self, X):
    method score (line 66) | def score(self, X, Y):
  function main (line 71) | def main():

FILE: ann_sigmoid.py
  class ANN (line 12) | class ANN(object):
    method __init__ (line 13) | def __init__(self, M):
    method fit (line 16) | def fit(self, X, Y, learning_rate=5e-7, reg=1.0, epochs=10000, show_fi...
    method forward (line 60) | def forward(self, X):
    method predict (line 66) | def predict(self, X):
    method score (line 71) | def score(self, X, Y):
  function main (line 76) | def main():

FILE: ann_tf.py
  class HiddenLayer (line 13) | class HiddenLayer(object):
    method __init__ (line 14) | def __init__(self, M1, M2, an_id):
    method forward (line 23) | def forward(self, X):
  class ANN (line 27) | class ANN(object):
    method __init__ (line 28) | def __init__(self, hidden_layer_sizes):
    method fit (line 31) | def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay...
    method forward (line 103) | def forward(self, X):
    method predict (line 109) | def predict(self, X):
  function main (line 114) | def main():

FILE: ann_theano.py
  function rmsprop (line 16) | def rmsprop(cost, params, lr, mu, decay, eps):
  class HiddenLayer (line 40) | class HiddenLayer(object):
    method __init__ (line 41) | def __init__(self, M1, M2, an_id):
    method forward (line 50) | def forward(self, X):
  class ANN (line 54) | class ANN(object):
    method __init__ (line 55) | def __init__(self, hidden_layer_sizes):
    method fit (line 58) | def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay...
    method th_forward (line 130) | def th_forward(self, X):
    method th_predict (line 136) | def th_predict(self, X):
    method predict (line 140) | def predict(self, X):
  function main (line 144) | def main():

FILE: cnn_tf.py
  function init_filter (line 20) | def init_filter(shape, poolsz):
  class ConvPoolLayer (line 25) | class ConvPoolLayer(object):
    method __init__ (line 26) | def __init__(self, mi, mo, fw=5, fh=5, poolsz=(2, 2)):
    method forward (line 37) | def forward(self, X):
  class CNN (line 50) | class CNN(object):
    method __init__ (line 51) | def __init__(self, convpool_layer_sizes, hidden_layer_sizes):
    method fit (line 55) | def fit(self, X, Y, Xvalid, Yvalid, lr=1e-2, mu=0.9, reg=1e-3, decay=0...
    method forward (line 147) | def forward(self, X):
    method predict (line 157) | def predict(self, X):
  function main (line 162) | def main():

FILE: cnn_theano.py
  class ConvPoolLayer (line 19) | class ConvPoolLayer(object):
    method __init__ (line 20) | def __init__(self, mi, mo, fw=5, fh=5, poolsz=(2, 2)):
    method forward (line 31) | def forward(self, X):
  class CNN (line 42) | class CNN(object):
    method __init__ (line 43) | def __init__(self, convpool_layer_sizes, hidden_layer_sizes):
    method fit (line 47) | def fit(self, X, Y, Xvalid, Yvalid, lr=1e-3, mu=0.99, reg=1e-3, decay=...
    method forward (line 141) | def forward(self, X):
    method th_predict (line 150) | def th_predict(self, X):
  function main (line 155) | def main():

FILE: logistic.py
  class LogisticModel (line 13) | class LogisticModel(object):
    method __init__ (line 14) | def __init__(self):
    method fit (line 17) | def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-7, reg=0., epochs...
    method forward (line 51) | def forward(self, X):
    method predict (line 54) | def predict(self, X):
    method score (line 58) | def score(self, X, Y):
  function main (line 63) | def main():

FILE: logistic_sigmoid.py
  class LogisticModel (line 12) | class LogisticModel(object):
    method __init__ (line 13) | def __init__(self):
    method fit (line 16) | def fit(self, X, Y, learning_rate=1e-6, reg=0., epochs=120000, show_fi...
    method forward (line 51) | def forward(self, X):
    method predict (line 54) | def predict(self, X):
    method score (line 59) | def score(self, X, Y):
  function main (line 64) | def main():

FILE: show_images.py
  function main (line 13) | def main():

FILE: util.py
  function init_weight_and_bias (line 11) | def init_weight_and_bias(M1, M2):
  function init_filter (line 17) | def init_filter(shape, poolsz):
  function relu (line 22) | def relu(x):
  function sigmoid (line 26) | def sigmoid(A):
  function softmax (line 30) | def softmax(A):
  function sigmoid_cost (line 35) | def sigmoid_cost(T, Y):
  function cost (line 39) | def cost(T, Y):
  function cost2 (line 43) | def cost2(T, Y):
  function error_rate (line 50) | def error_rate(targets, predictions):
  function y2indicator (line 54) | def y2indicator(y):
  function getData (line 63) | def getData(balance_ones=True, Ntest=1000):
  function getImageData (line 94) | def getImageData():
  function getBinaryData (line 103) | def getBinaryData():
  function crossValidation (line 119) | def crossValidation(model, X, Y, K=5):
Condensed preview — 12 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (35K chars).
[
  {
    "path": ".gitignore",
    "chars": 33,
    "preview": "fer2013*\n.DS_Store\n*.pyc\nTEST.py\n"
  },
  {
    "path": "README.md",
    "chars": 578,
    "preview": "Relevant Course URLs:\n\n* https://deeplearningcourses.com/c/data-science-logistic-regression-in-python/\n* https://deeplea"
  },
  {
    "path": "ann.py",
    "chars": 2484,
    "preview": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version o"
  },
  {
    "path": "ann_sigmoid.py",
    "chars": 2671,
    "preview": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version o"
  },
  {
    "path": "ann_tf.py",
    "chars": 3962,
    "preview": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version o"
  },
  {
    "path": "ann_theano.py",
    "chars": 4526,
    "preview": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version o"
  },
  {
    "path": "cnn_tf.py",
    "chars": 5882,
    "preview": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version o"
  },
  {
    "path": "cnn_theano.py",
    "chars": 5162,
    "preview": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version o"
  },
  {
    "path": "logistic.py",
    "chars": 2033,
    "preview": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version o"
  },
  {
    "path": "logistic_sigmoid.py",
    "chars": 2285,
    "preview": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version o"
  },
  {
    "path": "show_images.py",
    "chars": 785,
    "preview": "from __future__ import print_function, division\nfrom builtins import range, input\n# Note: you may need to update your ve"
  },
  {
    "path": "util.py",
    "chars": 3286,
    "preview": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version o"
  }
]

About this extraction

This page contains the full source code of the lazyprogrammer/facial-expression-recognition GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 12 files (32.9 KB), approximately 9.4k tokens, and a symbol index with 82 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!