[
  {
    "path": ".gitignore",
    "content": "fer2013*\n.DS_Store\n*.pyc\nTEST.py\n"
  },
  {
    "path": "README.md",
    "content": "Relevant Course URLs:\n\n* https://deeplearningcourses.com/c/data-science-logistic-regression-in-python/\n* https://deeplearningcourses.com/c/data-science-deep-learning-in-python/\n* https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow/\n* https://deeplearningcourses.com/c/deep-learning-convolutional-neural-networks-theano-tensorflow/\n\nData:\n\nhttps://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge\n\nIf you get \"An error occurred: Data not found\":\n\nhttps://archive.org/download/fer2013_202311/fer2013.csv\n"
  },
  {
    "path": "ann.py",
    "content": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version of future\n# sudo pip install -U future\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom util import getData, softmax, cost2, y2indicator, error_rate, relu\nfrom sklearn.utils import shuffle\n\n\nclass ANN(object):\n    def __init__(self, M):\n        self.M = M\n\n    def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-6, reg=1e-6, epochs=10000, show_fig=False):\n\n        N, D = X.shape\n        K = len(set(Y))\n        T = y2indicator(Y)\n        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)\n        self.b1 = np.zeros(self.M)\n        self.W2 = np.random.randn(self.M, K) / np.sqrt(self.M)\n        self.b2 = np.zeros(K)\n\n        costs = []\n        best_validation_error = 1\n        for i in range(epochs):\n            # forward propagation and cost calculation\n            pY, Z = self.forward(X)\n\n            # gradient descent step\n            pY_T = pY - T\n            self.W2 -= learning_rate*(Z.T.dot(pY_T) + reg*self.W2)\n            self.b2 -= learning_rate*(pY_T.sum(axis=0) + reg*self.b2)\n            # dZ = pY_T.dot(self.W2.T) * (Z > 0) # relu\n            dZ = pY_T.dot(self.W2.T) * (1 - Z*Z) # tanh\n            self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1)\n            self.b1 -= learning_rate*(dZ.sum(axis=0) + reg*self.b1)\n\n            if i % 10 == 0:\n                pYvalid, _ = self.forward(Xvalid)\n                c = cost2(Yvalid, pYvalid)\n                costs.append(c)\n                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))\n                print(\"i:\", i, \"cost:\", c, \"error:\", e)\n                if e < best_validation_error:\n                    best_validation_error = e\n        print(\"best_validation_error:\", best_validation_error)\n\n        if show_fig:\n            plt.plot(costs)\n            plt.show()\n\n\n    def forward(self, X):\n        # Z = relu(X.dot(self.W1) + self.b1)\n        Z = np.tanh(X.dot(self.W1) + self.b1)\n        return softmax(Z.dot(self.W2) + self.b2), Z\n\n    def predict(self, X):\n        pY, _ = self.forward(X)\n        return np.argmax(pY, axis=1)\n\n    def score(self, X, Y):\n        prediction = self.predict(X)\n        return 1 - error_rate(Y, prediction)\n\n\ndef main():\n    Xtrain, Ytrain, Xvalid, Yvalid = getData()\n    \n    model = ANN(200)\n    model.fit(Xtrain, Ytrain, Xvalid, Yvalid, reg=0, show_fig=True)\n    print(model.score(Xvalid, Yvalid))\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "ann_sigmoid.py",
    "content": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version of future\n# sudo pip install -U future\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.utils import shuffle\nfrom util import getBinaryData, sigmoid, sigmoid_cost, error_rate, relu\n\n\nclass ANN(object):\n    def __init__(self, M):\n        self.M = M\n\n    def fit(self, X, Y, learning_rate=5e-7, reg=1.0, epochs=10000, show_fig=False):\n        X, Y = shuffle(X, Y)\n        Xvalid, Yvalid = X[-1000:], Y[-1000:]\n        X, Y = X[:-1000], Y[:-1000]\n\n        N, D = X.shape\n        self.W1 = np.random.randn(D, self.M) / np.sqrt(D)\n        self.b1 = np.zeros(self.M)\n        self.W2 = np.random.randn(self.M) / np.sqrt(self.M)\n        self.b2 = 0\n\n        costs = []\n        best_validation_error = 1\n        for i in range(epochs):\n            # forward propagation and cost calculation\n            pY, Z = self.forward(X)\n\n            # gradient descent step\n            pY_Y = pY - Y\n            self.W2 -= learning_rate*(Z.T.dot(pY_Y) + reg*self.W2)\n            self.b2 -= learning_rate*((pY_Y).sum() + reg*self.b2)\n\n            # print \"(pY_Y).dot(self.W2.T) shape:\", (pY_Y).dot(self.W2.T).shape\n            # print \"Z shape:\", Z.shape\n            dZ = np.outer(pY_Y, self.W2) * (Z > 0)\n            # dZ = np.outer(pY_Y, self.W2) * (1 - Z*Z)\n            self.W1 -= learning_rate*(X.T.dot(dZ) + reg*self.W1)\n            self.b1 -= learning_rate*(np.sum(dZ, axis=0) + reg*self.b1)\n\n            if i % 20 == 0:\n                pYvalid, _ = self.forward(Xvalid)\n                c = sigmoid_cost(Yvalid, pYvalid)\n                costs.append(c)\n                e = error_rate(Yvalid, np.round(pYvalid))\n                print(\"i:\", i, \"cost:\", c, \"error:\", e)\n                if e < best_validation_error:\n                    best_validation_error = e\n        print(\"best_validation_error:\", best_validation_error)\n\n        if show_fig:\n            plt.plot(costs)\n            plt.show()\n\n\n    def forward(self, X):\n        Z = relu(X.dot(self.W1) + self.b1)\n        # Z = np.tanh(X.dot(self.W1) + self.b1)\n        return sigmoid(Z.dot(self.W2) + self.b2), Z\n\n\n    def predict(self, X):\n        pY = self.forward(X)\n        return np.round(pY)\n\n\n    def score(self, X, Y):\n        prediction = self.predict(X)\n        return 1 - error_rate(Y, prediction)\n\n\ndef main():\n    X, Y = getBinaryData()\n\n    X0 = X[Y==0, :]\n    X1 = X[Y==1, :]\n    X1 = np.repeat(X1, 9, axis=0)\n    X = np.vstack([X0, X1])\n    Y = np.array([0]*len(X0) + [1]*len(X1))\n    \n    model = ANN(100)\n    model.fit(X, Y, show_fig=True)\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "ann_tf.py",
    "content": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version of future\n# sudo pip install -U future\n\nimport numpy as np\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\nfrom util import getData, getBinaryData, y2indicator, error_rate, init_weight_and_bias\nfrom sklearn.utils import shuffle\n\n\nclass HiddenLayer(object):\n    def __init__(self, M1, M2, an_id):\n        self.id = an_id\n        self.M1 = M1\n        self.M2 = M2\n        W, b = init_weight_and_bias(M1, M2)\n        self.W = tf.Variable(W.astype(np.float32))\n        self.b = tf.Variable(b.astype(np.float32))\n        self.params = [self.W, self.b]\n\n    def forward(self, X):\n        return tf.nn.relu(tf.matmul(X, self.W) + self.b)\n\n\nclass ANN(object):\n    def __init__(self, hidden_layer_sizes):\n        self.hidden_layer_sizes = hidden_layer_sizes\n\n    def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, epochs=10, batch_sz=100, show_fig=False):\n        K = len(set(Y)) # won't work later b/c we turn it into indicator\n\n        # make a validation set\n        X, Y = shuffle(X, Y)\n        X = X.astype(np.float32)\n        Y = y2indicator(Y).astype(np.float32)\n\n        # for calculating error rate\n        Yvalid_flat = Yvalid\n        Yvalid = y2indicator(Yvalid).astype(np.float32)\n\n        # initialize hidden layers\n        N, D = X.shape\n        \n        self.hidden_layers = []\n        M1 = D\n        count = 0\n        for M2 in self.hidden_layer_sizes:\n            h = HiddenLayer(M1, M2, count)\n            self.hidden_layers.append(h)\n            M1 = M2\n            count += 1\n        W, b = init_weight_and_bias(M1, K)\n        self.W = tf.Variable(W.astype(np.float32))\n        self.b = tf.Variable(b.astype(np.float32))\n\n        # collect params for later use\n        self.params = [self.W, self.b]\n        for h in self.hidden_layers:\n            self.params += h.params\n\n        # set up theano functions and variables\n        tfX = tf.placeholder(tf.float32, shape=(None, D), name='X')\n        tfT = tf.placeholder(tf.float32, shape=(None, K), name='T')\n        act = self.forward(tfX)\n\n        rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params])\n        cost = tf.reduce_mean(\n            tf.nn.softmax_cross_entropy_with_logits(\n                logits=act,\n                labels=tfT\n            )\n        ) + rcost\n        prediction = self.predict(tfX)\n        train_op = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=mu).minimize(cost)\n\n        n_batches = N // batch_sz\n        costs = []\n        init = tf.global_variables_initializer()\n        with tf.Session() as session:\n            session.run(init)\n            for i in range(epochs):\n                X, Y = shuffle(X, Y)\n                for j in range(n_batches):\n                    Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]\n                    Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]\n\n                    session.run(train_op, feed_dict={tfX: Xbatch, tfT: Ybatch})\n\n                    if j % 20 == 0:\n                        c = session.run(cost, feed_dict={tfX: Xvalid, tfT: Yvalid})\n                        costs.append(c)\n\n                        p = session.run(prediction, feed_dict={tfX: Xvalid, tfT: Yvalid})\n                        e = error_rate(Yvalid_flat, p)\n                        print(\"i:\", i, \"j:\", j, \"nb:\", n_batches, \"cost:\", c, \"error rate:\", e)\n        \n        if show_fig:\n            plt.plot(costs)\n            plt.show()\n\n    def forward(self, X):\n        Z = X\n        for h in self.hidden_layers:\n            Z = h.forward(Z)\n        return tf.matmul(Z, self.W) + self.b\n\n    def predict(self, X):\n        act = self.forward(X)\n        return tf.argmax(act, 1)\n\n\ndef main():\n    Xtrain, Ytrain, Xvalid, Yvalid = getData()\n    model = ANN([2000, 1000, 500])\n    model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True)\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "ann_theano.py",
    "content": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version of future\n# sudo pip install -U future\n\nimport numpy as np\nimport theano\nimport theano.tensor as T\nimport matplotlib.pyplot as plt\n\nfrom util import getData, getBinaryData, error_rate, relu, init_weight_and_bias\nfrom sklearn.utils import shuffle\n\n\n\ndef rmsprop(cost, params, lr, mu, decay, eps):\n    grads = T.grad(cost, params)\n    updates = []\n    for p, g in zip(params, grads):\n        # cache\n        ones = np.ones_like(p.get_value(), dtype=np.float32)\n        c = theano.shared(ones)\n        new_c = decay*c + (np.float32(1.0) - decay)*g*g\n\n        # momentum\n        zeros = np.zeros_like(p.get_value(), dtype=np.float32)\n        m = theano.shared(zeros)\n        new_m = mu*m - lr*g / T.sqrt(new_c + eps)\n\n        # param update\n        new_p = p + new_m\n\n        # append the updates\n        updates.append((c, new_c))\n        updates.append((m, new_m))\n        updates.append((p, new_p))\n    return updates\n\n\nclass HiddenLayer(object):\n    def __init__(self, M1, M2, an_id):\n        self.id = an_id\n        self.M1 = M1\n        self.M2 = M2\n        W, b = init_weight_and_bias(M1, M2)\n        self.W = theano.shared(W, 'W_%s' % self.id)\n        self.b = theano.shared(b, 'b_%s' % self.id)\n        self.params = [self.W, self.b]\n\n    def forward(self, X):\n        return relu(X.dot(self.W) + self.b)\n\n\nclass ANN(object):\n    def __init__(self, hidden_layer_sizes):\n        self.hidden_layer_sizes = hidden_layer_sizes\n\n    def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-2, mu=0.99, decay=0.999, reg=1e-3, eps=1e-8, epochs=10, batch_sz=100, show_fig=False):\n        # downcast\n        learning_rate = np.float32(learning_rate)\n        mu = np.float32(mu)\n        decay = np.float32(decay)\n        reg = np.float32(reg)\n        eps = np.float32(eps)\n\n        X = X.astype(np.float32)\n        Xvalid = Xvalid.astype(np.float32)\n        Y = Y.astype(np.int32)\n        Yvalid = Yvalid.astype(np.int32)\n\n        # initialize hidden layers\n        N, D = X.shape\n        K = len(set(Y))\n        self.hidden_layers = []\n        M1 = D\n        count = 0\n        for M2 in self.hidden_layer_sizes:\n            h = HiddenLayer(M1, M2, count)\n            self.hidden_layers.append(h)\n            M1 = M2\n            count += 1\n        W, b = init_weight_and_bias(M1, K)\n        self.W = theano.shared(W, 'W_logreg')\n        self.b = theano.shared(b, 'b_logreg')\n\n        # collect params for later use\n        self.params = [self.W, self.b]\n        for h in self.hidden_layers:\n            self.params += h.params\n\n        # set up theano functions and variables\n        thX = T.fmatrix('X')\n        thY = T.ivector('Y')\n        pY = self.th_forward(thX)\n\n        rcost = reg*T.sum([(p*p).sum() for p in self.params])\n        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost\n        prediction = self.th_predict(thX)\n\n        # actual prediction function\n        self.predict_op = theano.function(inputs=[thX], outputs=prediction)\n        cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])\n\n        updates = rmsprop(cost, self.params, learning_rate, mu, decay, eps)\n        train_op = theano.function(\n            inputs=[thX, thY],\n            updates=updates\n        )\n\n        n_batches = N // batch_sz\n        costs = []\n        for i in range(epochs):\n            X, Y = shuffle(X, Y)\n            for j in range(n_batches):\n                Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]\n                Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]\n\n                train_op(Xbatch, Ybatch)\n\n                if j % 20 == 0:\n                    c, p = cost_predict_op(Xvalid, Yvalid)\n                    costs.append(c)\n                    e = error_rate(Yvalid, p)\n                    print(\"i:\", i, \"j:\", j, \"nb:\", n_batches, \"cost:\", c, \"error rate:\", e)\n        \n        if show_fig:\n            plt.plot(costs)\n            plt.show()\n\n    def th_forward(self, X):\n        Z = X\n        for h in self.hidden_layers:\n            Z = h.forward(Z)\n        return T.nnet.softmax(Z.dot(self.W) + self.b)\n\n    def th_predict(self, X):\n        pY = self.th_forward(X)\n        return T.argmax(pY, axis=1)\n\n    def predict(self, X):\n        return self.predict_op(X)\n\n\ndef main():\n    Xtrain, Ytrain, Xvalid, Yvalid = getData()\n    model = ANN([2000, 1000, 500])\n    model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True)\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "cnn_tf.py",
    "content": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version of future\n# sudo pip install -U future\n\nimport numpy as np\nimport tensorflow as tf\nimport matplotlib.pyplot as plt\n\nfrom sklearn.utils import shuffle\n\nfrom util import getImageData, error_rate, init_weight_and_bias, y2indicator\nfrom ann_tf import HiddenLayer\n\n# differences from Theano:\n# image dimensions are expected to be: N x width x height x color\n# filter shapes are expected to be: filter width x filter height x input feature maps x output feature maps\n\n\ndef init_filter(shape, poolsz):\n    w = np.random.randn(*shape) * np.sqrt(2) / np.sqrt(np.prod(shape[:-1]) + shape[-1]*np.prod(shape[:-2] / np.prod(poolsz)))\n    return w.astype(np.float32)\n\n\nclass ConvPoolLayer(object):\n    def __init__(self, mi, mo, fw=5, fh=5, poolsz=(2, 2)):\n        # mi = input feature map size\n        # mo = output feature map size\n        sz = (fw, fh, mi, mo)\n        W0 = init_filter(sz, poolsz)\n        self.W = tf.Variable(W0)\n        b0 = np.zeros(mo, dtype=np.float32)\n        self.b = tf.Variable(b0)\n        self.poolsz = poolsz\n        self.params = [self.W, self.b]\n\n    def forward(self, X):\n        conv_out = tf.nn.conv2d(X, self.W, strides=[1, 1, 1, 1], padding='SAME')\n        conv_out = tf.nn.bias_add(conv_out, self.b)\n        p1, p2 = self.poolsz\n        pool_out = tf.nn.max_pool(\n            conv_out,\n            ksize=[1, p1, p2, 1],\n            strides=[1, p1, p2, 1],\n            padding='SAME'\n        )\n        return tf.nn.relu(pool_out)\n\n\nclass CNN(object):\n    def __init__(self, convpool_layer_sizes, hidden_layer_sizes):\n        self.convpool_layer_sizes = convpool_layer_sizes\n        self.hidden_layer_sizes = hidden_layer_sizes\n\n    def fit(self, X, Y, Xvalid, Yvalid, lr=1e-2, mu=0.9, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=5, show_fig=True):\n        lr = np.float32(lr)\n        mu = np.float32(mu)\n        reg = np.float32(reg)\n        decay = np.float32(decay)\n        eps = np.float32(eps)\n        K = len(set(Y))\n\n        # make a validation set\n        X, Y = shuffle(X, Y)\n        X = X.astype(np.float32)\n        Y = y2indicator(Y).astype(np.float32)\n\n        Yvalid = y2indicator(Yvalid).astype(np.float32)\n        Yvalid_flat = np.argmax(Yvalid, axis=1) # for calculating error rate\n\n        # initialize convpool layers\n        N, width, height, c = X.shape\n        mi = c\n        outw = width\n        outh = height\n        self.convpool_layers = []\n        for mo, fw, fh in self.convpool_layer_sizes:\n            layer = ConvPoolLayer(mi, mo, fw, fh)\n            self.convpool_layers.append(layer)\n            outw = outw // 2\n            outh = outh // 2\n            mi = mo\n\n        # initialize mlp layers\n        self.hidden_layers = []\n        M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer\n        count = 0\n        for M2 in self.hidden_layer_sizes:\n            h = HiddenLayer(M1, M2, count)\n            self.hidden_layers.append(h)\n            M1 = M2\n            count += 1\n\n        # logistic regression layer\n        W, b = init_weight_and_bias(M1, K)\n        self.W = tf.Variable(W, 'W_logreg')\n        self.b = tf.Variable(b, 'b_logreg')\n\n        # collect params for later use\n        self.params = [self.W, self.b]\n        for h in self.convpool_layers:\n            self.params += h.params\n        for h in self.hidden_layers:\n            self.params += h.params\n\n        # set up tensorflow functions and variables\n        tfX = tf.placeholder(tf.float32, shape=(None, width, height, c), name='X')\n        tfY = tf.placeholder(tf.float32, shape=(None, K), name='Y')\n        act = self.forward(tfX)\n\n        rcost = reg*sum([tf.nn.l2_loss(p) for p in self.params])\n        cost = tf.reduce_mean(\n            tf.nn.softmax_cross_entropy_with_logits(\n                logits=act,\n                labels=tfY\n            )\n        ) + rcost\n        prediction = self.predict(tfX)\n\n        train_op = tf.train.RMSPropOptimizer(lr, decay=decay, momentum=mu).minimize(cost)\n\n        n_batches = N // batch_sz\n        costs = []\n        init = tf.global_variables_initializer()\n        with tf.Session() as session:\n            session.run(init)\n            for i in range(epochs):\n                X, Y = shuffle(X, Y)\n                for j in range(n_batches):\n                    Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]\n                    Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]\n\n                    session.run(train_op, feed_dict={tfX: Xbatch, tfY: Ybatch})\n\n                    if j % 20 == 0:\n                        c = session.run(cost, feed_dict={tfX: Xvalid, tfY: Yvalid})\n                        costs.append(c)\n\n                        p = session.run(prediction, feed_dict={tfX: Xvalid, tfY: Yvalid})\n                        e = error_rate(Yvalid_flat, p)\n                        print(\"i:\", i, \"j:\", j, \"nb:\", n_batches, \"cost:\", c, \"error rate:\", e)\n\n        if show_fig:\n            plt.plot(costs)\n            plt.show()\n\n    def forward(self, X):\n        Z = X\n        for c in self.convpool_layers:\n            Z = c.forward(Z)\n        Z_shape = Z.get_shape().as_list()\n        Z = tf.reshape(Z, [-1, np.prod(Z_shape[1:])])\n        for h in self.hidden_layers:\n            Z = h.forward(Z)\n        return tf.matmul(Z, self.W) + self.b\n\n    def predict(self, X):\n        pY = self.forward(X)\n        return tf.argmax(pY, 1)\n\n\ndef main():\n    Xtrain, Ytrain, Xvalid, Yvalid = getImageData()\n\n    # reshape X for tf: N x H x W x C\n    Xtrain = Xtrain.transpose((0, 2, 3, 1))\n    Xvalid = Xvalid.transpose((0, 2, 3, 1))\n\n    model = CNN(\n        convpool_layer_sizes=[(20, 5, 5), (20, 5, 5)],\n        hidden_layer_sizes=[500, 300],\n    )\n    model.fit(Xtrain, Ytrain, Xvalid, Yvalid)\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "cnn_theano.py",
    "content": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version of future\n# sudo pip install -U future\n\nimport numpy as np\nimport theano\nimport theano.tensor as T\nimport matplotlib.pyplot as plt\n\nfrom sklearn.utils import shuffle\nfrom theano.tensor.nnet import conv2d\nfrom theano.tensor.signal.pool import pool_2d\n\nfrom util import getImageData, error_rate, init_weight_and_bias, init_filter\nfrom ann_theano import HiddenLayer, rmsprop\n\n\nclass ConvPoolLayer(object):\n    def __init__(self, mi, mo, fw=5, fh=5, poolsz=(2, 2)):\n        # mi = input feature map size\n        # mo = output feature map size\n        sz = (mo, mi, fw, fh)\n        W0 = init_filter(sz, poolsz)\n        self.W = theano.shared(W0)\n        b0 = np.zeros(mo, dtype=np.float32)\n        self.b = theano.shared(b0)\n        self.poolsz = poolsz\n        self.params = [self.W, self.b]\n\n    def forward(self, X):\n        conv_out = conv2d(input=X, filters=self.W)\n        pooled_out = pool_2d(\n            input=conv_out,\n            ws=self.poolsz,\n            ignore_border=True,\n            mode='max',\n        )\n        return T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))\n\n\nclass CNN(object):\n    def __init__(self, convpool_layer_sizes, hidden_layer_sizes):\n        self.convpool_layer_sizes = convpool_layer_sizes\n        self.hidden_layer_sizes = hidden_layer_sizes\n\n    def fit(self, X, Y, Xvalid, Yvalid, lr=1e-3, mu=0.99, reg=1e-3, decay=0.99999, eps=1e-10, batch_sz=30, epochs=3, show_fig=True):\n        # downcast\n        lr = np.float32(lr)\n        mu = np.float32(mu)\n        reg = np.float32(reg)\n        decay = np.float32(decay)\n        eps = np.float32(eps)\n\n        X = X.astype(np.float32)\n        Xvalid = Xvalid.astype(np.float32)\n        Y = Y.astype(np.int32)\n        Yvalid = Yvalid.astype(np.int32)\n\n        # initialize convpool layers\n        N, c, width, height = X.shape\n        mi = c\n        outw = width\n        outh = height\n        self.convpool_layers = []\n        for mo, fw, fh in self.convpool_layer_sizes:\n            layer = ConvPoolLayer(mi, mo, fw, fh)\n            self.convpool_layers.append(layer)\n            outw = (outw - fw + 1) // 2\n            outh = (outh - fh + 1) // 2\n            mi = mo\n\n        # initialize mlp layers\n        K = len(set(Y))\n        self.hidden_layers = []\n        M1 = self.convpool_layer_sizes[-1][0]*outw*outh # size must be same as output of last convpool layer\n        count = 0\n        for M2 in self.hidden_layer_sizes:\n            h = HiddenLayer(M1, M2, count)\n            self.hidden_layers.append(h)\n            M1 = M2\n            count += 1\n\n        # logistic regression layer\n        W, b = init_weight_and_bias(M1, K)\n        self.W = theano.shared(W, 'W_logreg')\n        self.b = theano.shared(b, 'b_logreg')\n\n        # collect params for later use\n        self.params = [self.W, self.b]\n        for c in self.convpool_layers:\n            self.params += c.params\n        for h in self.hidden_layers:\n            self.params += h.params\n\n        # set up theano functions and variables\n        thX = T.tensor4('X', dtype='float32')\n        thY = T.ivector('Y')\n        pY = self.forward(thX)\n\n        rcost = reg*T.sum([(p*p).sum() for p in self.params])\n        cost = -T.mean(T.log(pY[T.arange(thY.shape[0]), thY])) + rcost\n        prediction = self.th_predict(thX)\n\n        cost_predict_op = theano.function(inputs=[thX, thY], outputs=[cost, prediction])\n\n        updates = rmsprop(cost, self.params, lr, mu, decay, eps)\n        train_op = theano.function(\n            inputs=[thX, thY],\n            outputs=cost,\n            updates=updates\n        )\n\n        n_batches = N // batch_sz\n        costs = []\n        for i in range(epochs):\n            X, Y = shuffle(X, Y)\n            for j in range(n_batches):\n                Xbatch = X[j*batch_sz:(j*batch_sz+batch_sz)]\n                Ybatch = Y[j*batch_sz:(j*batch_sz+batch_sz)]\n\n                train_c = train_op(Xbatch, Ybatch)\n\n                if j % 20 == 0:\n                    c, p = cost_predict_op(Xvalid, Yvalid)\n                    costs.append(c)\n                    e = error_rate(Yvalid, p)\n                    print(\n                        \"i:\", i,\n                        \"j:\", j,\n                        \"nb:\", n_batches,\n                        \"train cost:\", train_c,\n                        \"cost:\", c,\n                        \"error rate:\", e\n                    )\n\n        if show_fig:\n            plt.plot(costs)\n            plt.show()\n\n    def forward(self, X):\n        Z = X\n        for c in self.convpool_layers:\n            Z = c.forward(Z)\n        Z = Z.flatten(ndim=2)\n        for h in self.hidden_layers:\n            Z = h.forward(Z)\n        return T.nnet.softmax(Z.dot(self.W) + self.b)\n\n    def th_predict(self, X):\n        pY = self.forward(X)\n        return T.argmax(pY, axis=1)\n\n\ndef main():\n    Xtrain, Ytrain, Xvalid, Yvalid = getImageData()\n    model = CNN(\n        convpool_layer_sizes=[(20, 5, 5), (20, 5, 5)],\n        hidden_layer_sizes=[500, 300],\n    )\n    model.fit(Xtrain, Ytrain, Xvalid, Yvalid)\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "logistic.py",
    "content": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version of future\n# sudo pip install -U future\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom util import getData, softmax, cost, y2indicator, error_rate\nfrom sklearn.utils import shuffle\n\n\nclass LogisticModel(object):\n    def __init__(self):\n        pass\n\n    def fit(self, X, Y, Xvalid, Yvalid, learning_rate=1e-7, reg=0., epochs=10000, show_fig=False):\n        Tvalid = y2indicator(Yvalid)\n\n        N, D = X.shape\n        K = len(set(Y))\n        T = y2indicator(Y)\n        self.W = np.random.randn(D, K) / np.sqrt(D)\n        self.b = np.zeros(K)\n\n        costs = []\n        best_validation_error = 1\n        for i in range(epochs):\n            # forward propagation and cost calculation\n            pY = self.forward(X)\n\n            # gradient descent step\n            self.W -= learning_rate*(X.T.dot(pY - T) + reg*self.W)\n            self.b -= learning_rate*((pY - T).sum(axis=0) + reg*self.b)\n\n            if i % 10 == 0:\n                pYvalid = self.forward(Xvalid)\n                c = cost(Tvalid, pYvalid)\n                costs.append(c)\n                e = error_rate(Yvalid, np.argmax(pYvalid, axis=1))\n                print(\"i:\", i, \"cost:\", c, \"error:\", e)\n                if e < best_validation_error:\n                    best_validation_error = e\n        print(\"best_validation_error:\", best_validation_error)\n\n        if show_fig:\n            plt.plot(costs)\n            plt.show()\n\n\n    def forward(self, X):\n        return softmax(X.dot(self.W) + self.b)\n\n    def predict(self, X):\n        pY = self.forward(X)\n        return np.argmax(pY, axis=1)\n\n    def score(self, X, Y):\n        prediction = self.predict(X)\n        return 1 - error_rate(Y, prediction)\n\n\ndef main():\n    Xtrain, Ytrain, Xvalid, Yvalid = getData()\n    \n    model = LogisticModel()\n    model.fit(Xtrain, Ytrain, Xvalid, Yvalid, show_fig=True)\n    print(model.score(Xvalid, Yvalid))\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "logistic_sigmoid.py",
    "content": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version of future\n# sudo pip install -U future\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.utils import shuffle\nfrom util import getBinaryData, sigmoid, sigmoid_cost, error_rate\n\n\nclass LogisticModel(object):\n    def __init__(self):\n        pass\n\n    def fit(self, X, Y, learning_rate=1e-6, reg=0., epochs=120000, show_fig=False):\n        X, Y = shuffle(X, Y)\n        Xvalid, Yvalid = X[-1000:], Y[-1000:]\n        X, Y = X[:-1000], Y[:-1000]\n\n        N, D = X.shape\n        self.W = np.random.randn(D) / np.sqrt(D)\n        self.b = 0\n\n        costs = []\n        best_validation_error = 1\n        for i in range(epochs):\n                # forward propagation and cost calculation\n                pY = self.forward(X)\n\n                # gradient descent step\n                self.W -= learning_rate*(X.T.dot(pY - Y) + reg*self.W)\n                self.b -= learning_rate*((pY - Y).sum() + reg*self.b)\n\n                \n                if i % 20 == 0:\n                    pYvalid = self.forward(Xvalid)\n                    c = sigmoid_cost(Yvalid, pYvalid)\n                    costs.append(c)\n                    e = error_rate(Yvalid, np.round(pYvalid))\n                    print(\"i:\", i, \"cost:\", c, \"error:\", e)\n                    if e < best_validation_error:\n                        best_validation_error = e\n        print(\"best_validation_error:\", best_validation_error)\n\n        if show_fig:\n            plt.plot(costs)\n            plt.show()\n\n\n    def forward(self, X):\n        return sigmoid(X.dot(self.W) + self.b)\n\n    def predict(self, X):\n        pY = self.forward(X)\n        return np.round(pY)\n\n\n    def score(self, X, Y):\n        prediction = self.predict(X)\n        return 1 - error_rate(Y, prediction)\n\n\ndef main():\n    X, Y = getBinaryData()\n\n    X0 = X[Y==0, :]\n    X1 = X[Y==1, :]\n    X1 = np.repeat(X1, 9, axis=0)\n    X = np.vstack([X0, X1])\n    Y = np.array([0]*len(X0) + [1]*len(X1))\n    \n    model = LogisticModel()\n    model.fit(X, Y, show_fig=True)\n    model.score(X, Y)\n    # scores = cross_val_score(model, X, Y, cv=5)\n    # print \"score mean:\", np.mean(scores), \"stdev:\", np.std(scores)\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "show_images.py",
    "content": "from __future__ import print_function, division\nfrom builtins import range, input\n# Note: you may need to update your version of future\n# sudo pip install -U future\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom util import getData\n\nlabel_map = ['Anger', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']\n\ndef main():\n    X, Y, _, _ = getData(balance_ones=False)\n\n    while True:\n        for i in range(7):\n            x, y = X[Y==i], Y[Y==i]\n            N = len(y)\n            j = np.random.choice(N)\n            plt.imshow(x[j].reshape(48, 48), cmap='gray')\n            plt.title(label_map[y[j]])\n            plt.show()\n        prompt = input('Quit? Enter Y:\\n')\n        if prompt.lower().startswith('y'):\n            break\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "util.py",
    "content": "from __future__ import print_function, division\nfrom builtins import range\n# Note: you may need to update your version of future\n# sudo pip install -U future\n\nimport numpy as np\nimport pandas as pd\nfrom sklearn.utils import shuffle\n\n\ndef init_weight_and_bias(M1, M2):\n    W = np.random.randn(M1, M2) / np.sqrt(M1)\n    b = np.zeros(M2)\n    return W.astype(np.float32), b.astype(np.float32)\n\n\ndef init_filter(shape, poolsz):\n    w = np.random.randn(*shape) * np.sqrt(2) / np.sqrt(np.prod(shape[1:]) + shape[0]*np.prod(shape[2:] / np.prod(poolsz)))\n    return w.astype(np.float32)\n\n\ndef relu(x):\n    return x * (x > 0)\n\n\ndef sigmoid(A):\n    return 1 / (1 + np.exp(-A))\n\n\ndef softmax(A):\n    expA = np.exp(A)\n    return expA / expA.sum(axis=1, keepdims=True)\n\n\ndef sigmoid_cost(T, Y):\n    return -(T*np.log(Y) + (1-T)*np.log(1-Y)).sum()\n\n\ndef cost(T, Y):\n    return -(T*np.log(Y)).sum()\n\n\ndef cost2(T, Y):\n    # same as cost(), just uses the targets to index Y\n    # instead of multiplying by a large indicator matrix with mostly 0s\n    N = len(T)\n    return -np.log(Y[np.arange(N), T]).mean()\n\n\ndef error_rate(targets, predictions):\n    return np.mean(targets != predictions)\n\n\ndef y2indicator(y):\n    N = len(y)\n    K = len(set(y))\n    ind = np.zeros((N, K))\n    for i in range(N):\n        ind[i, y[i]] = 1\n    return ind\n\n\ndef getData(balance_ones=True, Ntest=1000):\n    # images are 48x48 = 2304 size vectors\n    Y = []\n    X = []\n    first = True\n    for line in open('fer2013.csv'):\n        if first:\n            first = False\n        else:\n            row = line.split(',')\n            Y.append(int(row[0]))\n            X.append([int(p) for p in row[1].split()])\n\n    X, Y = np.array(X) / 255.0, np.array(Y)\n\n    # shuffle and split\n    X, Y = shuffle(X, Y)\n    Xtrain, Ytrain = X[:-Ntest], Y[:-Ntest]\n    Xvalid, Yvalid = X[-Ntest:], Y[-Ntest:]\n\n    if balance_ones:\n        # balance the 1 class\n        X0, Y0 = Xtrain[Ytrain!=1, :], Ytrain[Ytrain!=1]\n        X1 = Xtrain[Ytrain==1, :]\n        X1 = np.repeat(X1, 9, axis=0)\n        Xtrain = np.vstack([X0, X1])\n        Ytrain = np.concatenate((Y0, [1]*len(X1)))\n\n    return Xtrain, Ytrain, Xvalid, Yvalid\n\n\ndef getImageData():\n    Xtrain, Ytrain, Xvalid, Yvalid = getData()\n    N, D = Xtrain.shape\n    d = int(np.sqrt(D))\n    Xtrain = Xtrain.reshape(-1, 1, d, d)\n    Xvalid = Xvalid.reshape(-1, 1, d, d)\n    return Xtrain, Ytrain, Xvalid, Yvalid\n\n\ndef getBinaryData():\n    Y = []\n    X = []\n    first = True\n    for line in open('fer2013.csv'):\n        if first:\n            first = False\n        else:\n            row = line.split(',')\n            y = int(row[0])\n            if y == 0 or y == 1:\n                Y.append(y)\n                X.append([int(p) for p in row[1].split()])\n    return np.array(X) / 255.0, np.array(Y)\n\n\ndef crossValidation(model, X, Y, K=5):\n    # split data into K parts\n    X, Y = shuffle(X, Y)\n    sz = len(Y) // K\n    errors = []\n    for k in range(K):\n        xtr = np.concatenate([ X[:k*sz, :], X[(k*sz + sz):, :] ])\n        ytr = np.concatenate([ Y[:k*sz], Y[(k*sz + sz):] ])\n        xte = X[k*sz:(k*sz + sz), :]\n        yte = Y[k*sz:(k*sz + sz)]\n\n        model.fit(xtr, ytr)\n        err = model.score(xte, yte)\n        errors.append(err)\n    print(\"errors:\", errors)\n    return np.mean(errors)\n"
  }
]