Full Code of fephsun/neuralnetmusic for AI

master 1b559a25bcfb cached
101 files
56.2 MB
2.2M tokens
360 symbols
1 requests
Download .txt
Showing preview only (8,858K chars total). Download the full file or copy to clipboard to get everything.
Repository: fephsun/neuralnetmusic
Branch: master
Commit: 1b559a25bcfb
Files: 101
Total size: 56.2 MB

Directory structure:
gitextract_7ps85ir2/

├── .gitignore
├── DBN.py
├── DeepLearningTutorials/
│   ├── .gitignore
│   ├── .hgignore
│   ├── .travis.yml
│   ├── README.rst
│   ├── __init__.py
│   ├── code/
│   │   ├── DBN.py
│   │   ├── SdA.py
│   │   ├── __init__.py
│   │   ├── cA.py
│   │   ├── convolutional_mlp.py
│   │   ├── dA.py
│   │   ├── hmc/
│   │   │   ├── __init__.py
│   │   │   ├── hmc.py
│   │   │   └── test_hmc.py
│   │   ├── imdb.py
│   │   ├── imdb_preprocess.py
│   │   ├── logistic_cg.py
│   │   ├── logistic_sgd.py
│   │   ├── lstm.py
│   │   ├── mlp.py
│   │   ├── rbm.py
│   │   ├── rnnrbm.py
│   │   ├── rnnslu.py
│   │   ├── test.py
│   │   └── utils.py
│   ├── data/
│   │   ├── download.sh
│   │   └── training_colorpatches_16x16_demo.mat
│   ├── doc/
│   │   ├── .templates/
│   │   │   └── layout.html
│   │   ├── DBN.txt
│   │   ├── LICENSE.txt
│   │   ├── Makefile
│   │   ├── SdA.txt
│   │   ├── conf.py
│   │   ├── contents.txt
│   │   ├── dA.txt
│   │   ├── deep.txt
│   │   ├── gettingstarted.txt
│   │   ├── hmc.txt
│   │   ├── index.txt
│   │   ├── lenet.txt
│   │   ├── logreg.txt
│   │   ├── lstm.txt
│   │   ├── mlp.txt
│   │   ├── rbm.txt
│   │   ├── references.txt
│   │   ├── rnnrbm.txt
│   │   ├── rnnslu.txt
│   │   ├── scripts/
│   │   │   └── docgen.py
│   │   └── utilities.txt
│   ├── issues_closed/
│   │   └── 2_RBM_cost_fn.txt
│   ├── issues_open/
│   │   ├── 1_SdA_performance.txt
│   │   ├── 3_RBM_scan_GPU.txt
│   │   ├── 4_RBM_scan.txt
│   │   ├── 5_results.txt
│   │   └── 6_benchmarking_pybrain.txt
│   └── misc/
│       └── do_nightly_build
├── README.md
├── joplin/
│   ├── alabama.xml
│   ├── cleopha.xml
│   ├── entertainer.xml
│   ├── maple_leaf.xml
│   ├── searchlight.xml
│   ├── strenous.xml
│   ├── syncopations.xml
│   ├── winners.xml
│   └── winners_2.xml
├── joplin-model.pickle
├── joplin_data.pickle
├── midi/
│   ├── DataTypeConverters.py
│   ├── EventDispatcher.py
│   ├── Icon_
│   ├── MidiFileParser.py
│   ├── MidiInFile.py
│   ├── MidiInStream.py
│   ├── MidiOutFile.py
│   ├── MidiOutStream.py
│   ├── MidiToText.py
│   ├── RawInstreamFile.py
│   ├── RawOutstreamFile.py
│   ├── __init__.py
│   ├── changes.txt
│   ├── constants.py
│   ├── example_mimimal_type0.py
│   ├── example_print_channel_0.py
│   ├── example_print_events.py
│   ├── example_print_file.py
│   ├── example_transpose_octave.py
│   ├── files.txt
│   ├── hallelujah.mid
│   ├── license.txt
│   ├── readme
│   ├── readme.txt
│   ├── utils.py
│   └── version.txt
├── myparser.py
└── neural-plugin/
    ├── DoubleTime.js
    ├── neural-plugin.js
    ├── neural-plugin.ui
    └── output-window.ui

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
*.pyc

================================================
FILE: DBN.py
================================================
"""
"""
import cPickle
import os
import sys
import time
import os.path as path
import copy

import numpy

import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams

from DeepLearningTutorials.code.mlp import HiddenLayer
from DeepLearningTutorials.code.rbm import RBM

from PIL import Image

import myparser
from midi.utils import midiwrite


# compute_test_value is 'off' by default, meaning this feature is inactive
theano.config.compute_test_value = 'off' # Use 'warn' to activate this feature

# For switching between 32 and 64 bit systems, because Theano is a little silly
# like that.
NUMPY_DTYPE = numpy.float64

# start-snippet-1
class AutoencodingDBN(object):
    """
    An autoencoding Deep Belief Network, based on the classifying DBN in the
    Theano tutorial.  (Most of the code is copied over.)
    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[1000, 1000, 1000]):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)
        self.layer_sizes = hidden_layers_sizes

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.x_mask = T.matrix('x_mask')    # For partial information.

        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)


            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)
            self.params.extend(rbm_layer.params)

        # And build the upside-down network.  This shares parameters with the
        # forward network. Except the weights are transposed and stuff.

        # The "isolated" layers let you run only the upside-down part of the
        # network, for generation.  The non-isolated layers are connected to
        # the forward, compressing part of the network, and are used for
        # training.
        reverse_input = self.sigmoid_layers[-1].output
        self.isolated_reverse_input = theano.shared(
            numpy.zeros([10, hidden_layers_sizes[-1]]))
        isolated_input = self.isolated_reverse_input
        self.reverse_layers = [None] * self.n_layers
        self.isolated_reverse = [None] * self.n_layers
        for i in reversed(xrange(self.n_layers)):    
            if i == 0:
                out_size = n_ins
            else:
                out_size = hidden_layers_sizes[i-1]
            reverse_sigmoid = HiddenLayer(rng=numpy_rng,
                input=reverse_input,
                n_in=hidden_layers_sizes[i],
                n_out=out_size,
                W=self.sigmoid_layers[i].W.T,
                b=self.rbm_layers[i].vbias,
                activation=T.nnet.sigmoid
            )
            isolated_sigmoid = HiddenLayer(rng=numpy_rng,
                input=isolated_input,
                n_in=hidden_layers_sizes[i],
                n_out=out_size,
                W=self.sigmoid_layers[i].W.T,
                b=self.rbm_layers[i].vbias,
                activation=T.nnet.sigmoid
            )
            
            reverse_input = reverse_sigmoid.output
            isolated_input = isolated_sigmoid.output
            self.reverse_layers[i] = reverse_sigmoid
            self.isolated_reverse[i] = isolated_sigmoid


        # The fine-tune cost is the reconstruction error of the entire net.
        self.finetune_cost = ((self.x - self.reverse_layers[0].output)**2).sum()

        # The cost for training the generative net - in this case, self.x is
        # completely disconnected, and we feed a pattern into the reverse net.
        self.generative_cost = ((self.x - self.isolated_reverse[0].output)**2).sum()

        # The l1 cost is for generating constrained samples of the input.  (Aka
        # harmonizing a melody.)  Given a melody in self.x and a mask
        # self.x_mask of which parts of self.x actually matter, it computes the
        # error between the generated sample and the melody.
        self.l1_cost = (((self.x - self.isolated_reverse[0].output) * self.x_mask)**2).sum()

    def dump_params(self, outLoc):
        """
        Takes all of the weights, and stores them as numpy arrays.
        This is so the params are portable between GPU machines and CPU machines.
        To load the params, you need to call load_from_dump, which re-makes your
        DBN.
        """
        dump = {}
        for layer in range(self.n_layers):
            dump[(layer, 0)] = numpy.array(self.sigmoid_layers[layer].W.get_value())
            dump[(layer, 1)] = numpy.array(self.sigmoid_layers[layer].b.get_value())
            dump[(layer, 2)] = numpy.array(self.reverse_layers[layer].b.get_value())
        cPickle.dump(dump, open(outLoc, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)

    def pretraining_functions(self, train_set_x, batch_size, k):
        '''Generates a list of functions, for performing one step of
        gradient descent at a given layer. The function will require
        as input the minibatch index, and to train an RBM you just
        need to iterate, calling the corresponding function on all
        minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared var. that contains all datapoints used
                            for training the RBM
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :param k: number of Gibbs steps to do in CD-k / PCD-k

        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        learning_rate = T.scalar('lr')  # learning rate to use

        # number of batches
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:

            # get the cost and the updates list
            # using CD-k here (persisent=None) for training each RBM.
            # TODO: change cost function to reconstruction error
            cost, updates = rbm.get_cost_updates(learning_rate,
                                                 persistent=None, k=k)

            # compile the theano function
            fn = theano.function(
                inputs=[index, theano.Param(learning_rate, default=0.1)],
                outputs=cost,
                updates=updates,
                givens={
                    self.x: train_set_x[batch_begin:batch_end]
                }
            )
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, train_set_x, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on a
        batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                        the has to contain three pairs, `train`,
                        `valid`, `test` in this order, where each pair
                        is formed of two Theano variables, one for the
                        datapoints, the other for the labels
        :type batch_size: int
        :param batch_size: size of a minibatch
        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage

        '''

        index = T.lscalar('index')  # index to a [mini]batch
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate))

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x: train_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
            }
        )

        test_score_i = theano.function(
            [index],
            self.finetune_cost,
            givens={
                self.x: train_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
            }
        )

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in xrange(n_batches)]

        return train_fn, test_score


    def build_generative_finetune_fns(self, train_set_outputs, train_set_labels,
                                      batch_size, learning_rate):
        index = T.lscalar('index')  # index to a [mini]batch
        n_batches = train_set_outputs.get_value(borrow=True).shape[0] / batch_size

        # compute the gradients with respect to the model parameters
        # First, only one of the RBM biases is actually a parameter of the
        # generative model, so we have to fix that.
        gen_params = []
        for i in range(self.n_layers):
            gen_params.append(self.rbm_layers[i].vbias)
            gen_params.append(self.rbm_layers[i].W)
        gparams = T.grad(self.generative_cost, gen_params)

        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(gen_params, gparams):
            updates.append((param, param - gparam * learning_rate))

        train_fn = theano.function(
            inputs=[index],
            outputs=self.generative_cost,
            updates=updates,
            givens={
                self.x: train_set_outputs[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.isolated_reverse[-1].input: train_set_labels[
                    index * batch_size: (index + 1) * batch_size
                ],
            }
        )

        test_score_i = theano.function(
            [index],
            self.generative_cost,
            givens={
                self.x: train_set_outputs[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.isolated_reverse[-1].input: train_set_labels[
                    index * batch_size: (index + 1) * batch_size
                ],
            }
        )

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in xrange(n_batches)]

        return train_fn, test_score

    def generate(self, top_level):
        """
        Make a new piano roll, given top level values.  (Uses the backwards
        section of the network to make a sample.)
        """
        generator = theano.function(
            [],
            self.reverse_layers[0].output,
            givens={
                self.reverse_layers[-1].input: top_level
            }
        )
        return generator()

    def label(self, to_label, x_mask, learning_rate):
        """
        Estimate top layer, given an incomplete layer 1.
        x_mask represents which values of to_label are unknown.
        """
        grad = T.grad(self.l1_cost, self.isolated_reverse_input)
        # compute list of fine-tuning updates
        updates = (self.isolated_reverse_input, 
            self.isolated_reverse_input - grad * learning_rate)

        train_fn = theano.function(
            inputs=[],
            outputs=self.l1_cost,
            updates=[updates],
            givens={
                self.x: to_label,
                self.x_mask: x_mask,
            }
        )
        return train_fn

    def train_dbn(self, data_file, finetune_lr=0.01, pretraining_epochs=100,
        pretrain_lr=0.05, k=1, training_epochs=1000, batch_size=10):

        raw_x = cPickle.load(open(data_file, 'rb')).astype(dtype=NUMPY_DTYPE)
        train_set_x = theano.shared(raw_x)
        

        # compute number of minibatches for training, validation and testing
        n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        print n_train_batches

        # start-snippet-2
        #########################
        # PRETRAINING THE MODEL #
        #########################
        print '... getting the pretraining functions'
        pretraining_fns = self.pretraining_functions(train_set_x=train_set_x,
                                                    batch_size=batch_size,
                                                    k=k)


        print '... pre-training the model'
        start_time = time.clock()
        ## Pre-train layer-wise
        for i in xrange(self.n_layers - 1):
            # go through pretraining epochs
            for epoch in xrange(pretraining_epochs):
                # go through the training set
                c = []
                for batch_index in xrange(n_train_batches):
                    c.append(pretraining_fns[i](index=batch_index,
                                                lr=pretrain_lr))
                print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
                print numpy.mean(c)

        end_time = time.clock()
        # end-snippet-2
        print >> sys.stderr, ('The pretraining code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))

        # If you'd like to try out different parameters for the fine-tuner only,
        # you can cache the initial model state, so you don't have to pre-train
        # every time.
        cPickle.dump(self, open('initial-model.pickle', 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
        ########################
        # FINETUNING THE MODEL #
        ########################

        # get the training, validation and testing function for the model

        print '... getting the finetuning functions'
        use_autoencoder = False
        if use_autoencoder:
            train_fn, test_model = self.build_finetune_functions(
                train_set_x=train_set_x,
                batch_size=batch_size,
                learning_rate=finetune_lr
            )
        else:
            raw_labels = numpy.random.randint(2,
                size=[raw_x.shape[0], self.layer_sizes[-1]])\
                .astype(dtype=numpy.float64)
            labels = theano.shared(raw_labels)
            train_fn, test_model = self.build_generative_finetune_fns(
                train_set_outputs=train_set_x,
                train_set_labels=labels,
                batch_size=batch_size,
                learning_rate=finetune_lr 
            )

        print '... finetuning the model'
        # early-stopping parameters
        patience = 4 * n_train_batches  # look as this many examples regardless
        patience_increase = 2.    # wait this much longer when a new best is
                                  # found
        improvement_threshold = 0.995  # a relative improvement of this much is
                                       # considered significant
        validation_frequency = min(n_train_batches, patience / 2)
                                      # go through this many
                                      # minibatches before checking the network
                                      # on the validation set; in this case we
                                      # check every epoch

        best_validation_loss = numpy.inf
        test_score = 0.
        start_time = time.clock()

        done_looping = False
        epoch = 0

        while (epoch < training_epochs) and (not done_looping):
            epoch = epoch + 1
            for minibatch_index in xrange(n_train_batches):

                minibatch_avg_cost = train_fn(minibatch_index)
                iter = (epoch - 1) * n_train_batches + minibatch_index

                if (iter + 1) % validation_frequency == 0:

                    validation_losses = test_model()
                    this_validation_loss = numpy.mean(validation_losses)
                    print(
                        'epoch %i, minibatch %i/%i, validation error %f %%'
                        % (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            this_validation_loss * 100.
                        )
                    )

                    # if we got the best validation score until now
                    if this_validation_loss < best_validation_loss:

                        #improve patience if loss improvement is good enough
                        if (
                            this_validation_loss < best_validation_loss *
                            improvement_threshold
                        ):
                            patience = max(patience, iter * patience_increase)

                        # save best validation score and iteration number
                        best_validation_loss = this_validation_loss
                        best_iter = iter

                if patience <= iter:
                    done_looping = True
                    break

        end_time = time.clock()
        print(
            (
                'Optimization complete with best validation score of %f, '
                'obtained at iteration %i, '
            ) % (best_validation_loss, best_iter + 1)
        )
        print >> sys.stderr, ('The fine tuning code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time)
                                                  / 60.))
        self.dump_params('./my-model.pickle')

    def sample(self, top_level=None, rootLoc='./', save=True, threshold=0.5,
            filename='test.midi'):
        """
        Generates a sample from the trained neural net.  top_level is a 10 x
        [size of top layer] matrix whose rows contain values for the top
        layer.  Most of the time, I only use the first row, but you can only
        process data in increments of batch_size.
        """
        if top_level is None:
            top_level_size = self.layer_sizes[-1]
            top_level = numpy.random.randint(2, size=[10, top_level_size])\
                .astype(dtype=NUMPY_DTYPE)
        output = self.generate(top_level)
        output = output.reshape([10, 88*64])
        firstIm = output[0, :].reshape([88, 64])
        # Makes a little picture of the piano roll.
        outIm = Image.fromarray((firstIm*255).astype('uint8'))
        outIm.save(path.join(rootLoc, 'test.png'))
        if threshold is not None:
            firstIm[firstIm > threshold] = 1
            firstIm[firstIm <= threshold] = 0
        if save:
            midiwrite(path.join(rootLoc, filename), firstIm.T, r=(12, 109), dt=64)
        return firstIm

    def label_from_file(self, rootLoc, fileLoc, learn_rate, n_iters, threshold):
        """
        Given a xml file at fileLoc, harmonizes the melody in the xml file, by
        doing gradient descent on the top hidden layer of the network.  This
        gives us an estimate of the top layer activations that might generate
        the melody. We then run the network forwards to get the entire harmony
        from the top level activations that we estimate.
        """
        noteReader = myparser.LegatoNoteAdder(64)
        myparser.read(fileLoc, noteReader.handle)
        snippet = noteReader.mtx
        mask = melody_blocker(snippet)

        linear_snippet = snippet.reshape([88*64])
        linear_mask = mask.reshape([88*64])
        in_data = numpy.zeros([10, 88*64])
        x_mask = numpy.zeros([10, 88*64])
        for i in range(10):
            in_data[i, :] = linear_snippet
            x_mask[i, :] = linear_mask


        # Do gradient descent to estimate the activations on layer 1.
        new_vals = theano.shared(
            value=numpy.random.sample([10, self.layer_sizes[-1]]),
        )
        f = theano.function(
            inputs=[],
            updates=[(self.isolated_reverse_input, new_vals)],
        )
        f()
        trainer = self.label(in_data, x_mask, learn_rate)
        for i in range(n_iters):
            print trainer()

        # Then, generate using it.
        result = dbn.sample(self.isolated_reverse_input, rootLoc=rootLoc, save=False,
            threshold=threshold)
        # Add the melody back onto the snippet.
        final = result * (1.0 - mask)
        final = final + snippet
        final[final > 0.5] = 1
        midiwrite(path.join(rootLoc, 'test.midi'), final.T, r=(12, 109), dt=64)
        return final

def melody_blocker(snippet):
    """
    Makes a mask where anything above the top line of the snippet is 1.  Also
    enforces empty space a major 2nd above and below the melody.  (This means
    the optimizer will consider any note above the top line of the melody, or
    too close to the melody, wrong.)
    """
    envelope = numpy.copy(snippet)
    _, length = snippet.shape
    for i in range(length):
        occupied = [x for x in range(88) if snippet[x, i] != 0]
        if len(occupied) == 0:
            continue
        top = max(occupied)
        envelope[top:, i] = 1
        for pitch in occupied:
            envelope[pitch-2:pitch+3, i] = 1
    return envelope

def load_from_dump(inLoc):
    """
    Loads data from dumped state (generated by dumped_params), and creates a
    new DBN.
    """
    dump = cPickle.load(open(inLoc, 'rb'))
    # Get the number of layers.
    max_layer = 0
    for layer, _ in dump:
        if layer > max_layer:
            max_layer = layer
    max_layer += 1
    # Get the size of each layer.
    layer_sizes = []
    for layer in range(max_layer):
        layer_sizes.append(len(dump[(layer, 1)]))
    # For now, the size of the input is fixed at 88x64, but you can read that
    # out of the dump, as well.
    dbn = AutoencodingDBN(numpy_rng=numpy.random.RandomState(),
        n_ins=88*64,
        hidden_layers_sizes=layer_sizes)
    for layer in range(max_layer):
        dbn.sigmoid_layers[layer].W.set_value(dump[(layer, 0)])
        dbn.sigmoid_layers[layer].b.set_value(dump[(layer, 1)])
        dbn.reverse_layers[layer].b.set_value(dump[(layer, 2)])
    return dbn

if __name__ == '__main__':
    if sys.argv[1] == 'train':
        dbn = AutoencodingDBN(numpy_rng=numpy.random.RandomState(),
            n_ins=88*64,
            hidden_layers_sizes=[1024, 256, 64])
        dbn.train_dbn('./joplin-data.pickle')
        exit()
    dbn = load_from_dump('./joplin-model.pickle')
    import sys
    if sys.argv[1] == 'sample':
        dbn.sample(threshold=0.5)
    elif sys.argv[1] == 'harmonize': 
        dbn.label_from_file(path.dirname(sys.argv[0]), './12-days.xml',
            0.01, 500, 0.4)
    else:
        print "invalid command"


================================================
FILE: DeepLearningTutorials/.gitignore
================================================
code/*.pyc
code/*_plots
code/tmp*
code/midi
code/rnnslu
data/atis.*
data/mnist.pkl.gz
data/mnist_py3k.pkl.gz
data/Nottingham.zip
data/Nottingham
data/midi.zip
html
*.pyc
*~
*.swp


================================================
FILE: DeepLearningTutorials/.hgignore
================================================
syntax: glob
*.pyc
*.png
*~


================================================
FILE: DeepLearningTutorials/.travis.yml
================================================
# After changing this file, check it on:
# http://lint.travis-ci.org/

#We can't get scipy installed with the python language
#So we will use the system python from the c language.
language: c
#language: python
#python:
#  - "2.5"
#  - "2.7"
#  - "3.2"
# command to install dependencies
before_install:
#zlib1g-dev is needed to allow PIL to uncompress the dataset.
  - sudo apt-get update
  - sudo apt-get install -qq libatlas3gf-base libatlas-dev zlib1g-dev zip unzip zlibc libzip-dev libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev python-numpy python-scipy python-pip python-nose python-yaml pyflakes python-imaging

install:
#  - "pip install -q numpy --use-mirrors"
# Use Pillow instead of PIL as it is better packaged
#  - "pip install -q Pillow --use-mirrors"
#If we don't install numpy before SciPy 0.10.1, the SciPy installations fails.
#  - "pip install -q scipy --use-mirrors"
  - "sudo pip install --no-deps git+git://github.com/Theano/Theano.git"

env:
  - PART="test.py:test_logistic_sgd test.py:test_logistic_cg test.py:test_mlp test.py:test_convolutional_mlp test.py:test_dA"
  - PART="test.py:test_SdA"
  - PART="test.py:test_dbn"
  - PART="test.py:test_rbm test.py:test_rnnrbm"
  - PART="-e test.py"

#i7-2600K CPU @ 3.40GHz
#166.572s   #8      test.test_rbm OK
#155.114s   #7      test.test_dbn OK
#152.365s   #9      test.test_rnnrbm OK
#127.286s   #6      test.test_SdA OK
#39.252s    #5      test.test_dA OK
#27.56s     #4      test.test_convolutional_mlp OK
#15.454s    #3      test.test_mlp OK
#12.732s    #1      test.test_logistic_sgd OK
#12.638s    #2      test.test_logistic_cg OK

#i7-920
#296.475s   #7      code.test.test_dbn OK
#257.272s   #6      code.test.test_SdA OK
#234.776s   #9      code.test.test_rnnrbm OK
#233.896s   #8      code.test.test_rbm OK
#65.737s    #5      code.test.test_dA OK
#37.658s    #4      code.test.test_convolutional_mlp OK
#24.172s    #3      code.test.test_mlp OK
#20.401s    #1      code.test.test_logistic_sgd OK
#17.546s    #2      code.test.test_logistic_cg OK

# On Core2 duo E8500 with MRG
#308.004s   #7      code.test.test_dbn OK
#277.268s   #6      code.test.test_SdA OK
#126.102s   #8      code.test.test_rbm OK
#123.652s   #9      code.test.test_rnnrbm OK
#77.101s    #5      code.test.test_dA OK
#39.75s     #4      code.test.test_convolutional_mlp OK
#30.406s    #3      code.test.test_mlp OK
#21.132s    #2      code.test.test_logistic_cg OK
#17.945s    #1      code.test.test_logistic_sgd OK

# Unknown computer with older version of Theano
#569.882s   #9      code.test.test_rbm OK
#298.992s   #8      code.test.test_dbn OK
#268.901s   #7      code.test.test_SdA OK
#67.292s    #6      code.test.test_dA OK
#27.485s    #4      code.test.test_mlp OK
#26.204s    #5      code.test.test_convolutional_mlp OK
#14.676s    #3      code.test.test_logistic_cg OK
#10.66s     #2      code.test.test_logistic_sgd OK
#5.795s     #1      code.hmc.test_hmc.test_hmc OK

script:
  - cd data
  - ./download.sh
  - ls
  - cd ../code
  - pwd
  - ls
  - export THEANO_FLAGS=warn.ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise
  - python --version
  - nosetests $PART



================================================
FILE: DeepLearningTutorials/README.rst
================================================
Deep Learning Tutorials
=======================

Deep Learning is a new area of Machine Learning research, which has been
introduced with the objective of moving Machine Learning closer to one of its
original goals: Artificial Intelligence.  Deep Learning is about learning
multiple levels of representation and abstraction that help to make sense of
data such as images, sound, and text.  The tutorials presented here will
introduce you to some of the most important deep learning algorithms and will
also show you how to run them using Theano.  Theano is a python library that
makes writing deep learning models easy, and gives the option of training them
on a GPU.

The easiest way to follow the tutorials is to `browse them online
<http://deeplearning.net/tutorial/>`_.

`Main development <http://github.com/lisa-lab/DeepLearningTutorials>`_
of this project.

.. image:: https://secure.travis-ci.org/lisa-lab/DeepLearningTutorials.png
   :target: http://travis-ci.org/lisa-lab/DeepLearningTutorials

Project Layout
--------------

Subdirectories:

- code - Python files corresponding to each tutorial
- data - data and scripts to download data that is used by the tutorials
- doc  - restructured text used by Sphinx to build the tutorial website
- html - built automatically by doc/Makefile, contains tutorial website
- issues_closed - issue tracking
- issues_open - issue tracking
- misc - administrative scripts


Build instructions
------------------

To build the html version of the tutorials, install sphinx and run doc/Makefile


================================================
FILE: DeepLearningTutorials/__init__.py
================================================


================================================
FILE: DeepLearningTutorials/code/DBN.py
================================================
"""
"""
import os
import sys
import time

import numpy

import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams

from logistic_sgd import LogisticRegression, load_data
from mlp import HiddenLayer
from rbm import RBM


# start-snippet-1
class DBN(object):
    """Deep Belief Network

    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network
        """

        self.sigmoid_layers = []
        self.rbm_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels
        # end-snippet-1
        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)

            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            # its arguably a philosophical question...  but we are
            # going to only declare that the parameters of the
            # sigmoid_layers are parameters of the DBN. The visible
            # biases in the RBM are parameters of those RBMs, but not
            # of the DBN.
            self.params.extend(sigmoid_layer.params)

            # Construct an RBM that shared weights with this layer
            rbm_layer = RBM(numpy_rng=numpy_rng,
                            theano_rng=theano_rng,
                            input=layer_input,
                            n_visible=input_size,
                            n_hidden=hidden_layers_sizes[i],
                            W=sigmoid_layer.W,
                            hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.params.extend(self.logLayer.params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

    def pretraining_functions(self, train_set_x, batch_size, k):
        '''Generates a list of functions, for performing one step of
        gradient descent at a given layer. The function will require
        as input the minibatch index, and to train an RBM you just
        need to iterate, calling the corresponding function on all
        minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared var. that contains all datapoints used
                            for training the RBM
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :param k: number of Gibbs steps to do in CD-k / PCD-k

        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        learning_rate = T.scalar('lr')  # learning rate to use

        # number of batches
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:

            # get the cost and the updates list
            # using CD-k here (persisent=None) for training each RBM.
            # TODO: change cost function to reconstruction error
            cost, updates = rbm.get_cost_updates(learning_rate,
                                                 persistent=None, k=k)

            # compile the theano function
            fn = theano.function(
                inputs=[index, theano.Param(learning_rate, default=0.1)],
                outputs=cost,
                updates=updates,
                givens={
                    self.x: train_set_x[batch_begin:batch_end]
                }
            )
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on a
        batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                        the has to contain three pairs, `train`,
                        `valid`, `test` in this order, where each pair
                        is formed of two Theano variables, one for the
                        datapoints, the other for the labels
        :type batch_size: int
        :param batch_size: size of a minibatch
        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage

        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - gparam * learning_rate))

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x: train_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: train_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            }
        )

        test_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: test_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: test_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            }
        )

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: valid_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: valid_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            }
        )

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in xrange(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in xrange(n_test_batches)]

        return train_fn, valid_score, test_score


def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
             pretrain_lr=0.01, k=1, training_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=10):
    """
    Demonstrates how to train and test a Deep Belief Network.

    This is demonstrated on MNIST.

    :type finetune_lr: float
    :param finetune_lr: learning rate used in the finetune stage
    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining
    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training
    :type k: int
    :param k: number of Gibbs steps in CD/PCD
    :type training_epochs: int
    :param training_epochs: maximal number of iterations ot run the optimizer
    :type dataset: string
    :param dataset: path the the pickled dataset
    :type batch_size: int
    :param batch_size: the size of a minibatch
    """

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # numpy random generator
    numpy_rng = numpy.random.RandomState(123)
    print '... building the model'
    # construct the Deep Belief Network
    dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28,
              hidden_layers_sizes=[1000, 1000, 1000],
              n_outs=10)

    # start-snippet-2
    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size,
                                                k=k)

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    for i in xrange(dbn.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                                            lr=pretrain_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)

    end_time = time.clock()
    # end-snippet-2
    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model = dbn.build_finetune_functions(
        datasets=datasets,
        batch_size=batch_size,
        learning_rate=finetune_lr
    )

    print '... finetuning the model'
    # early-stopping parameters
    patience = 4 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.    # wait this much longer when a new best is
                              # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatches before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:

                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%'
                    % (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(
        (
            'Optimization complete with best validation score of %f %%, '
            'obtained at iteration %i, '
            'with test performance %f %%'
        ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
    )
    print >> sys.stderr, ('The fine tuning code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time)
                                              / 60.))


if __name__ == '__main__':
    test_DBN()


================================================
FILE: DeepLearningTutorials/code/SdA.py
================================================
"""
 This tutorial introduces stacked denoising auto-encoders (SdA) using Theano.

 Denoising autoencoders are the building blocks for SdA.
 They are based on auto-encoders as the ones used in Bengio et al. 2007.
 An autoencoder takes an input x and first maps it to a hidden representation
 y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting
 latent representation y is then mapped back to a "reconstructed" vector
 z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b').  The weight
 matrix W' can optionally be constrained such that W' = W^T, in which case
 the autoencoder is said to have tied weights. The network is trained such
 that to minimize the reconstruction error (the error between x and z).

 For the denosing autoencoder, during training, first x is corrupted into
 \tilde{x}, where \tilde{x} is a partially destroyed version of x by means
 of a stochastic mapping. Afterwards y is computed as before (using
 \tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction
 error is now measured between z and the uncorrupted input x, which is
 computed as the cross-entropy :
      - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)]


 References :
   - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and
   Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103,
   2008
   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
   Training of Deep Networks, Advances in Neural Information Processing
   Systems 19, 2007

"""
import os
import sys
import time

import numpy

import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams

from logistic_sgd import LogisticRegression, load_data
from mlp import HiddenLayer
from dA import dA


# start-snippet-1
class SdA(object):
    """Stacked denoising auto-encoder class (SdA)

    A stacked denoising autoencoder model is obtained by stacking several
    dAs. The hidden layer of the dA at layer `i` becomes the input of
    the dA at layer `i+1`. The first layer dA gets as input the input of
    the SdA, and the hidden layer of the last dA represents the output.
    Note that after pretraining, the SdA is dealt with as a normal MLP,
    the dAs are only used to initialize the weights.
    """

    def __init__(
        self,
        numpy_rng,
        theano_rng=None,
        n_ins=784,
        hidden_layers_sizes=[500, 500],
        n_outs=10,
        corruption_levels=[0.1, 0.1]
    ):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels
        # end-snippet-1

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        # start-snippet-2
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b)
            self.dA_layers.append(dA_layer)
        # end-snippet-2
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs
        )

        self.params.extend(self.logLayer.params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

    def pretraining_functions(self, train_set_x, batch_size):
        ''' Generates a list of functions, each of them implementing one
        step in trainnig the dA corresponding to the layer with same index.
        The function will require as input the minibatch index, and to train
        a dA you just need to iterate, calling the corresponding function on
        all minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared variable that contains all datapoints used
                            for training the dA

        :type batch_size: int
        :param batch_size: size of a [mini]batch

        :type learning_rate: float
        :param learning_rate: learning rate used during training for any of
                              the dA layers
        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        corruption_level = T.scalar('corruption')  # % of corruption to use
        learning_rate = T.scalar('lr')  # learning rate to use
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for dA in self.dA_layers:
            # get the cost and the updates list
            cost, updates = dA.get_cost_updates(corruption_level,
                                                learning_rate)
            # compile the theano function
            fn = theano.function(
                inputs=[
                    index,
                    theano.Param(corruption_level, default=0.2),
                    theano.Param(learning_rate, default=0.1)
                ],
                outputs=cost,
                updates=updates,
                givens={
                    self.x: train_set_x[batch_begin: batch_end]
                }
            )
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns

    def build_finetune_functions(self, datasets, batch_size, learning_rate):
        '''Generates a function `train` that implements one step of
        finetuning, a function `validate` that computes the error on
        a batch from the validation set, and a function `test` that
        computes the error on a batch from the testing set

        :type datasets: list of pairs of theano.tensor.TensorType
        :param datasets: It is a list that contain all the datasets;
                         the has to contain three pairs, `train`,
                         `valid`, `test` in this order, where each pair
                         is formed of two Theano variables, one for the
                         datapoints, the other for the labels

        :type batch_size: int
        :param batch_size: size of a minibatch

        :type learning_rate: float
        :param learning_rate: learning rate used during finetune stage
        '''

        (train_set_x, train_set_y) = datasets[0]
        (valid_set_x, valid_set_y) = datasets[1]
        (test_set_x, test_set_y) = datasets[2]

        # compute number of minibatches for training, validation and testing
        n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
        n_valid_batches /= batch_size
        n_test_batches = test_set_x.get_value(borrow=True).shape[0]
        n_test_batches /= batch_size

        index = T.lscalar('index')  # index to a [mini]batch

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = [
            (param, param - gparam * learning_rate)
            for param, gparam in zip(self.params, gparams)
        ]

        train_fn = theano.function(
            inputs=[index],
            outputs=self.finetune_cost,
            updates=updates,
            givens={
                self.x: train_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: train_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            },
            name='train'
        )

        test_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: test_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: test_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            },
            name='test'
        )

        valid_score_i = theano.function(
            [index],
            self.errors,
            givens={
                self.x: valid_set_x[
                    index * batch_size: (index + 1) * batch_size
                ],
                self.y: valid_set_y[
                    index * batch_size: (index + 1) * batch_size
                ]
            },
            name='valid'
        )

        # Create a function that scans the entire validation set
        def valid_score():
            return [valid_score_i(i) for i in xrange(n_valid_batches)]

        # Create a function that scans the entire test set
        def test_score():
            return [test_score_i(i) for i in xrange(n_test_batches)]

        return train_fn, valid_score, test_score


def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
             pretrain_lr=0.001, training_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=1):
    """
    Demonstrates how to train and test a stochastic denoising autoencoder.

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used in the finetune stage
    (factor for the stochastic gradient)

    :type pretraining_epochs: int
    :param pretraining_epochs: number of epoch to do pretraining

    :type pretrain_lr: float
    :param pretrain_lr: learning rate to be used during pre-training

    :type n_iter: int
    :param n_iter: maximal number of iterations ot run the optimizer

    :type dataset: string
    :param dataset: path the the pickled dataset

    """

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    # numpy random generator
    # start-snippet-3
    numpy_rng = numpy.random.RandomState(89677)
    print '... building the model'
    # construct the stacked denoising autoencoder class
    sda = SdA(
        numpy_rng=numpy_rng,
        n_ins=28 * 28,
        hidden_layers_sizes=[1000, 1000, 1000],
        n_outs=10
    )
    # end-snippet-3 start-snippet-4
    #########################
    # PRETRAINING THE MODEL #
    #########################
    print '... getting the pretraining functions'
    pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
                                                batch_size=batch_size)

    print '... pre-training the model'
    start_time = time.clock()
    ## Pre-train layer-wise
    corruption_levels = [.1, .2, .3]
    for i in xrange(sda.n_layers):
        # go through pretraining epochs
        for epoch in xrange(pretraining_epochs):
            # go through the training set
            c = []
            for batch_index in xrange(n_train_batches):
                c.append(pretraining_fns[i](index=batch_index,
                         corruption=corruption_levels[i],
                         lr=pretrain_lr))
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
            print numpy.mean(c)

    end_time = time.clock()

    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    # end-snippet-4
    ########################
    # FINETUNING THE MODEL #
    ########################

    # get the training, validation and testing function for the model
    print '... getting the finetuning functions'
    train_fn, validate_model, test_model = sda.build_finetune_functions(
        datasets=datasets,
        batch_size=batch_size,
        learning_rate=finetune_lr
    )

    print '... finetunning the model'
    # early-stopping parameters
    patience = 10 * n_train_batches  # look as this many examples regardless
    patience_increase = 2.  # wait this much longer when a new best is
                            # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validate_model()
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = test_model()
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(
        (
            'Optimization complete with best validation score of %f %%, '
            'on iteration %i, '
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
    )
    print >> sys.stderr, ('The training code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))


if __name__ == '__main__':
    test_SdA()


================================================
FILE: DeepLearningTutorials/code/__init__.py
================================================


================================================
FILE: DeepLearningTutorials/code/cA.py
================================================
"""This tutorial introduces Contractive auto-encoders (cA) using Theano.

 They are based on auto-encoders as the ones used in Bengio et
 al. 2007.  An autoencoder takes an input x and first maps it to a
 hidden representation y = f_{\theta}(x) = s(Wx+b), parameterized by
 \theta={W,b}. The resulting latent representation y is then mapped
 back to a "reconstructed" vector z \in [0,1]^d in input space z =
 g_{\theta'}(y) = s(W'y + b').  The weight matrix W' can optionally be
 constrained such that W' = W^T, in which case the autoencoder is said
 to have tied weights. The network is trained such that to minimize
 the reconstruction error (the error between x and z).  Adding the
 squared Frobenius norm of the Jacobian of the hidden mapping h with
 respect to the visible units yields the contractive auto-encoder:

      - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)]
      + \| \frac{\partial h(x)}{\partial x} \|^2

 References :
   - S. Rifai, P. Vincent, X. Muller, X. Glorot, Y. Bengio: Contractive
   Auto-Encoders: Explicit Invariance During Feature Extraction, ICML-11

   - S. Rifai, X. Muller, X. Glorot, G. Mesnil, Y. Bengio, and Pascal
     Vincent. Learning invariant features through local space
     contraction. Technical Report 1360, Universite de Montreal

   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
   Training of Deep Networks, Advances in Neural Information Processing
   Systems 19, 2007

"""
import os
import sys
import time

import numpy

import theano
import theano.tensor as T


from logistic_sgd import load_data
from utils import tile_raster_images

try:
    import PIL.Image as Image
except ImportError:
    import Image


class cA(object):
    """ Contractive Auto-Encoder class (cA)

    The contractive autoencoder tries to reconstruct the input with an
    additional constraint on the latent space. With the objective of
    obtaining a robust representation of the input space, we
    regularize the L2 norm(Froebenius) of the jacobian of the hidden
    representation with respect to the input. Please refer to Rifai et
    al.,2011 for more details.

    If x is the input then equation (1) computes the projection of the
    input into the latent space h. Equation (2) computes the jacobian
    of h with respect to x.  Equation (3) computes the reconstruction
    of the input, while equation (4) computes the reconstruction
    error and the added regularization term from Eq.(2).

    .. math::

        h_i = s(W_i x + b_i)                                             (1)

        J_i = h_i (1 - h_i) * W_i                                        (2)

        x' = s(W' h  + b')                                               (3)

        L = -sum_{k=1}^d [x_k \log x'_k + (1-x_k) \log( 1-x'_k)]
             + lambda * sum_{i=1}^d sum_{j=1}^n J_{ij}^2                 (4)

    """

    def __init__(self, numpy_rng, input=None, n_visible=784, n_hidden=100,
                 n_batchsize=1, W=None, bhid=None, bvis=None):
        """Initialize the cA class by specifying the number of visible units
        (the dimension d of the input), the number of hidden units (the
        dimension d' of the latent or hidden space) and the contraction level.
        The constructor also receives symbolic variables for the input, weights
        and bias.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: number random generator used to generate weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given
                     one is generated based on a seed drawn from `rng`

        :type input: theano.tensor.TensorType
        :param input: a symbolic description of the input or None for
                      standalone cA

        :type n_visible: int
        :param n_visible: number of visible units

        :type n_hidden: int
        :param n_hidden:  number of hidden units

        :type n_batchsize int
        :param n_batchsize: number of examples per batch

        :type W: theano.tensor.TensorType
        :param W: Theano variable pointing to a set of weights that should be
                  shared belong the dA and another architecture; if dA should
                  be standalone set this to None

        :type bhid: theano.tensor.TensorType
        :param bhid: Theano variable pointing to a set of biases values (for
                     hidden units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None

        :type bvis: theano.tensor.TensorType
        :param bvis: Theano variable pointing to a set of biases values (for
                     visible units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None

        """
        self.n_visible = n_visible
        self.n_hidden = n_hidden
        self.n_batchsize = n_batchsize
        # note : W' was written as `W_prime` and b' as `b_prime`
        if not W:
            # W is initialized with `initial_W` which is uniformely sampled
            # from -4*sqrt(6./(n_visible+n_hidden)) and
            # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
            # converted using asarray to dtype
            # theano.config.floatX so that the code is runable on GPU
            initial_W = numpy.asarray(
                numpy_rng.uniform(
                    low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                    high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                    size=(n_visible, n_hidden)
                ),
                dtype=theano.config.floatX
            )
            W = theano.shared(value=initial_W, name='W', borrow=True)

        if not bvis:
            bvis = theano.shared(value=numpy.zeros(n_visible,
                                                   dtype=theano.config.floatX),
                                 borrow=True)

        if not bhid:
            bhid = theano.shared(value=numpy.zeros(n_hidden,
                                                   dtype=theano.config.floatX),
                                 name='b',
                                 borrow=True)

        self.W = W
        # b corresponds to the bias of the hidden
        self.b = bhid
        # b_prime corresponds to the bias of the visible
        self.b_prime = bvis
        # tied weights, therefore W_prime is W transpose
        self.W_prime = self.W.T

        # if no input is given, generate a variable representing the input
        if input is None:
            # we use a matrix because we expect a minibatch of several
            # examples, each example being a row
            self.x = T.dmatrix(name='input')
        else:
            self.x = input

        self.params = [self.W, self.b, self.b_prime]

    def get_hidden_values(self, input):
        """ Computes the values of the hidden layer """
        return T.nnet.sigmoid(T.dot(input, self.W) + self.b)

    def get_jacobian(self, hidden, W):
        """Computes the jacobian of the hidden layer with respect to
        the input, reshapes are necessary for broadcasting the
        element-wise product on the right axis

        """
        return T.reshape(hidden * (1 - hidden),
                         (self.n_batchsize, 1, self.n_hidden)) * T.reshape(
                             W, (1, self.n_visible, self.n_hidden))

    def get_reconstructed_input(self, hidden):
        """Computes the reconstructed input given the values of the
        hidden layer

        """
        return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)

    def get_cost_updates(self, contraction_level, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the cA """

        y = self.get_hidden_values(self.x)
        z = self.get_reconstructed_input(y)
        J = self.get_jacobian(y, self.W)
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
        self.L_rec = - T.sum(self.x * T.log(z) +
                             (1 - self.x) * T.log(1 - z),
                             axis=1)

        # Compute the jacobian and average over the number of samples/minibatch
        self.L_jacob = T.sum(J ** 2) / self.n_batchsize

        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        cost = T.mean(self.L_rec) + contraction_level * T.mean(self.L_jacob)

        # compute the gradients of the cost of the `cA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = []
        for param, gparam in zip(self.params, gparams):
            updates.append((param, param - learning_rate * gparam))

        return (cost, updates)


def test_cA(learning_rate=0.01, training_epochs=20,
            dataset='mnist.pkl.gz',
            batch_size=10, output_folder='cA_plots', contraction_level=.1):
    """
    This demo is tested on MNIST

    :type learning_rate: float
    :param learning_rate: learning rate used for training the contracting
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

    :type dataset: string
    :param dataset: path to the picked dataset

    """
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)
    ####################################
    #        BUILDING THE MODEL        #
    ####################################

    rng = numpy.random.RandomState(123)

    ca = cA(numpy_rng=rng, input=x,
            n_visible=28 * 28, n_hidden=500, n_batchsize=batch_size)

    cost, updates = ca.get_cost_updates(contraction_level=contraction_level,
                                        learning_rate=learning_rate)

    train_ca = theano.function(
        [index],
        [T.mean(ca.L_rec), ca.L_jacob],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_ca(batch_index))

        c_array = numpy.vstack(c)
        print 'Training epoch %d, reconstruction cost ' % epoch, numpy.mean(
            c_array[0]), ' jacobian norm ', numpy.mean(numpy.sqrt(c_array[1]))

    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((training_time) / 60.))
    image = Image.fromarray(tile_raster_images(
        X=ca.W.get_value(borrow=True).T,
        img_shape=(28, 28), tile_shape=(10, 10),
        tile_spacing=(1, 1)))

    image.save('cae_filters.png')

    os.chdir('../')


if __name__ == '__main__':
    test_cA()


================================================
FILE: DeepLearningTutorials/code/convolutional_mlp.py
================================================
"""This tutorial introduces the LeNet5 neural network architecture
using Theano.  LeNet5 is a convolutional neural network, good for
classifying images. This tutorial shows how to build the architecture,
and comes with all the hyper-parameters you need to reproduce the
paper's MNIST results.


This implementation simplifies the model in the following ways:

 - LeNetConvPool doesn't implement location-specific gain and bias parameters
 - LeNetConvPool doesn't implement pooling by average, it implements pooling
   by max.
 - Digit classification is implemented with a logistic regression rather than
   an RBF network
 - LeNet5 was not fully-connected convolutions at second layer

References:
 - Y. LeCun, L. Bottou, Y. Bengio and P. Haffner:
   Gradient-Based Learning Applied to Document
   Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998.
   http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf

"""
import os
import sys
import time

import numpy

import theano
import theano.tensor as T
from theano.tensor.signal import downsample
from theano.tensor.nnet import conv

from logistic_sgd import LogisticRegression, load_data
from mlp import HiddenLayer


class LeNetConvPoolLayer(object):
    """Pool Layer of a convolutional network """

    def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """

        assert image_shape[1] == filter_shape[1]
        self.input = input

        # there are "num input feature maps * filter height * filter width"
        # inputs to each hidden unit
        fan_in = numpy.prod(filter_shape[1:])
        # each unit in the lower layer receives a gradient from:
        # "num output feature maps * filter height * filter width" /
        #   pooling size
        fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
                   numpy.prod(poolsize))
        # initialize weights with random weights
        W_bound = numpy.sqrt(6. / (fan_in + fan_out))
        self.W = theano.shared(
            numpy.asarray(
                rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
                dtype=theano.config.floatX
            ),
            borrow=True
        )

        # the bias is a 1D tensor -- one bias per output feature map
        b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
        self.b = theano.shared(value=b_values, borrow=True)

        # convolve input feature maps with filters
        conv_out = conv.conv2d(
            input=input,
            filters=self.W,
            filter_shape=filter_shape,
            image_shape=image_shape
        )

        # downsample each feature map individually, using maxpooling
        pooled_out = downsample.max_pool_2d(
            input=conv_out,
            ds=poolsize,
            ignore_border=True
        )

        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))

        # store parameters of this layer
        self.params = [self.W, self.b]


def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
                    dataset='mnist.pkl.gz',
                    nkerns=[20, 50], batch_size=500):
    """ Demonstrates lenet on MNIST dataset

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: path to the dataset used for training /testing (MNIST here)

    :type nkerns: list of ints
    :param nkerns: number of kernels on each layer
    """

    rng = numpy.random.RandomState(23455)

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
    n_test_batches = test_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size
    n_valid_batches /= batch_size
    n_test_batches /= batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # start-snippet-1
    x = T.matrix('x')   # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
    # to a 4D tensor, compatible with our LeNetConvPoolLayer
    # (28, 28) is the size of MNIST images.
    layer0_input = x.reshape((batch_size, 1, 28, 28))

    # Construct the first convolutional pooling layer:
    # filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
    # maxpooling reduces this further to (24/2, 24/2) = (12, 12)
    # 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
    layer0 = LeNetConvPoolLayer(
        rng,
        input=layer0_input,
        image_shape=(batch_size, 1, 28, 28),
        filter_shape=(nkerns[0], 1, 5, 5),
        poolsize=(2, 2)
    )

    # Construct the second convolutional pooling layer
    # filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
    # maxpooling reduces this further to (8/2, 8/2) = (4, 4)
    # 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
    layer1 = LeNetConvPoolLayer(
        rng,
        input=layer0.output,
        image_shape=(batch_size, nkerns[0], 12, 12),
        filter_shape=(nkerns[1], nkerns[0], 5, 5),
        poolsize=(2, 2)
    )

    # the HiddenLayer being fully-connected, it operates on 2D matrices of
    # shape (batch_size, num_pixels) (i.e matrix of rasterized images).
    # This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
    # or (500, 50 * 4 * 4) = (500, 800) with the default values.
    layer2_input = layer1.output.flatten(2)

    # construct a fully-connected sigmoidal layer
    layer2 = HiddenLayer(
        rng,
        input=layer2_input,
        n_in=nkerns[1] * 4 * 4,
        n_out=500,
        activation=T.tanh
    )

    # classify the values of the fully-connected sigmoidal layer
    layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)

    # the cost we minimize during training is the NLL of the model
    cost = layer3.negative_log_likelihood(y)

    # create a function to compute the mistakes that are made by the model
    test_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        [index],
        layer3.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # create a list of all model parameters to be fit by gradient descent
    params = layer3.params + layer2.params + layer1.params + layer0.params

    # create a list of gradients for all model parameters
    grads = T.grad(cost, params)

    # train_model is a function that updates the model parameters by
    # SGD Since this model has many parameters, it would be tedious to
    # manually create an update rule for each model parameter. We thus
    # create the updates list by automatically looping over all
    # (params[i], grads[i]) pairs.
    updates = [
        (param_i, param_i - learning_rate * grad_i)
        for param_i, grad_i in zip(params, grads)
    ]

    train_model = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-1

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'
    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            iter = (epoch - 1) * n_train_batches + minibatch_index

            if iter % 100 == 0:
                print 'training @ iter = ', iter
            cost_ij = train_model(minibatch_index)

            if (iter + 1) % validation_frequency == 0:

                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)
                print('epoch %i, minibatch %i/%i, validation error %f %%' %
                      (epoch, minibatch_index + 1, n_train_batches,
                       this_validation_loss * 100.))

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:

                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    # save best validation score and iteration number
                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [
                        test_model(i)
                        for i in xrange(n_test_batches)
                    ]
                    test_score = numpy.mean(test_losses)
                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print('Optimization complete.')
    print('Best validation score of %f %% obtained at iteration %i, '
          'with test performance %f %%' %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

if __name__ == '__main__':
    evaluate_lenet5()


def experiment(state, channel):
    evaluate_lenet5(state.learning_rate, dataset=state.dataset)


================================================
FILE: DeepLearningTutorials/code/dA.py
================================================
"""
 This tutorial introduces denoising auto-encoders (dA) using Theano.

 Denoising autoencoders are the building blocks for SdA.
 They are based on auto-encoders as the ones used in Bengio et al. 2007.
 An autoencoder takes an input x and first maps it to a hidden representation
 y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting
 latent representation y is then mapped back to a "reconstructed" vector
 z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b').  The weight
 matrix W' can optionally be constrained such that W' = W^T, in which case
 the autoencoder is said to have tied weights. The network is trained such
 that to minimize the reconstruction error (the error between x and z).

 For the denosing autoencoder, during training, first x is corrupted into
 \tilde{x}, where \tilde{x} is a partially destroyed version of x by means
 of a stochastic mapping. Afterwards y is computed as before (using
 \tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction
 error is now measured between z and the uncorrupted input x, which is
 computed as the cross-entropy :
      - \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)]


 References :
   - P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and
   Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103,
   2008
   - Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
   Training of Deep Networks, Advances in Neural Information Processing
   Systems 19, 2007

"""

import os
import sys
import time

import numpy

import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams

from logistic_sgd import load_data
from utils import tile_raster_images

try:
    import PIL.Image as Image
except ImportError:
    import Image


# start-snippet-1
class dA(object):
    """Denoising Auto-Encoder class (dA)

    A denoising autoencoders tries to reconstruct the input from a corrupted
    version of it by projecting it first in a latent space and reprojecting
    it afterwards back in the input space. Please refer to Vincent et al.,2008
    for more details. If x is the input then equation (1) computes a partially
    destroyed version of x by means of a stochastic mapping q_D. Equation (2)
    computes the projection of the input into the latent space. Equation (3)
    computes the reconstruction of the input, while equation (4) computes the
    reconstruction error.

    .. math::

        \tilde{x} ~ q_D(\tilde{x}|x)                                     (1)

        y = s(W \tilde{x} + b)                                           (2)

        x = s(W' y  + b')                                                (3)

        L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)]      (4)

    """

    def __init__(
        self,
        numpy_rng,
        theano_rng=None,
        input=None,
        n_visible=784,
        n_hidden=500,
        W=None,
        bhid=None,
        bvis=None
    ):
        """
        Initialize the dA class by specifying the number of visible units (the
        dimension d of the input ), the number of hidden units ( the dimension
        d' of the latent or hidden space ) and the corruption level. The
        constructor also receives symbolic variables for the input, weights and
        bias. Such a symbolic variables are useful when, for example the input
        is the result of some computations, or when weights are shared between
        the dA and an MLP layer. When dealing with SdAs this always happens,
        the dA on layer 2 gets as input the output of the dA on layer 1,
        and the weights of the dA are used in the second stage of training
        to construct an MLP.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: number random generator used to generate weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                     generated based on a seed drawn from `rng`

        :type input: theano.tensor.TensorType
        :param input: a symbolic description of the input or None for
                      standalone dA

        :type n_visible: int
        :param n_visible: number of visible units

        :type n_hidden: int
        :param n_hidden:  number of hidden units

        :type W: theano.tensor.TensorType
        :param W: Theano variable pointing to a set of weights that should be
                  shared belong the dA and another architecture; if dA should
                  be standalone set this to None

        :type bhid: theano.tensor.TensorType
        :param bhid: Theano variable pointing to a set of biases values (for
                     hidden units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None

        :type bvis: theano.tensor.TensorType
        :param bvis: Theano variable pointing to a set of biases values (for
                     visible units) that should be shared belong dA and another
                     architecture; if dA should be standalone set this to None


        """
        self.n_visible = n_visible
        self.n_hidden = n_hidden

        # create a Theano random generator that gives symbolic random values
        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # note : W' was written as `W_prime` and b' as `b_prime`
        if not W:
            # W is initialized with `initial_W` which is uniformely sampled
            # from -4*sqrt(6./(n_visible+n_hidden)) and
            # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
            # converted using asarray to dtype
            # theano.config.floatX so that the code is runable on GPU
            initial_W = numpy.asarray(
                numpy_rng.uniform(
                    low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                    high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                    size=(n_visible, n_hidden)
                ),
                dtype=theano.config.floatX
            )
            W = theano.shared(value=initial_W, name='W', borrow=True)

        if not bvis:
            bvis = theano.shared(
                value=numpy.zeros(
                    n_visible,
                    dtype=theano.config.floatX
                ),
                borrow=True
            )

        if not bhid:
            bhid = theano.shared(
                value=numpy.zeros(
                    n_hidden,
                    dtype=theano.config.floatX
                ),
                name='b',
                borrow=True
            )

        self.W = W
        # b corresponds to the bias of the hidden
        self.b = bhid
        # b_prime corresponds to the bias of the visible
        self.b_prime = bvis
        # tied weights, therefore W_prime is W transpose
        self.W_prime = self.W.T
        self.theano_rng = theano_rng
        # if no input is given, generate a variable representing the input
        if input is None:
            # we use a matrix because we expect a minibatch of several
            # examples, each example being a row
            self.x = T.dmatrix(name='input')
        else:
            self.x = input

        self.params = [self.W, self.b, self.b_prime]
    # end-snippet-1

    def get_corrupted_input(self, input, corruption_level):
        """This function keeps ``1-corruption_level`` entries of the inputs the
        same and zero-out randomly selected subset of size ``coruption_level``
        Note : first argument of theano.rng.binomial is the shape(size) of
               random numbers that it should produce
               second argument is the number of trials
               third argument is the probability of success of any trial

                this will produce an array of 0s and 1s where 1 has a
                probability of 1 - ``corruption_level`` and 0 with
                ``corruption_level``

                The binomial function return int64 data type by
                default.  int64 multiplicated by the input
                type(floatX) always return float64.  To keep all data
                in floatX when floatX is float32, we set the dtype of
                the binomial to floatX. As in our case the value of
                the binomial is always 0 or 1, this don't change the
                result. This is needed to allow the gpu to work
                correctly as it only support float32 for now.

        """
        return self.theano_rng.binomial(size=input.shape, n=1,
                                        p=1 - corruption_level,
                                        dtype=theano.config.floatX) * input

    def get_hidden_values(self, input):
        """ Computes the values of the hidden layer """
        return T.nnet.sigmoid(T.dot(input, self.W) + self.b)

    def get_reconstructed_input(self, hidden):
        """Computes the reconstructed input given the values of the
        hidden layer

        """
        return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)

    def get_cost_updates(self, corruption_level, learning_rate):
        """ This function computes the cost and the updates for one trainng
        step of the dA """

        tilde_x = self.get_corrupted_input(self.x, corruption_level)
        y = self.get_hidden_values(tilde_x)
        z = self.get_reconstructed_input(y)
        # note : we sum over the size of a datapoint; if we are using
        #        minibatches, L will be a vector, with one entry per
        #        example in minibatch
        L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
        # note : L is now a vector, where each element is the
        #        cross-entropy cost of the reconstruction of the
        #        corresponding example of the minibatch. We need to
        #        compute the average of all these to get the cost of
        #        the minibatch
        cost = T.mean(L)

        # compute the gradients of the cost of the `dA` with respect
        # to its parameters
        gparams = T.grad(cost, self.params)
        # generate the list of updates
        updates = [
            (param, param - learning_rate * gparam)
            for param, gparam in zip(self.params, gparams)
        ]

        return (cost, updates)


def test_dA(learning_rate=0.1, training_epochs=15,
            dataset='mnist.pkl.gz',
            batch_size=20, output_folder='dA_plots'):

    """
    This demo is tested on MNIST

    :type learning_rate: float
    :param learning_rate: learning rate used for training the DeNosing
                          AutoEncoder

    :type training_epochs: int
    :param training_epochs: number of epochs used for training

    :type dataset: string
    :param dataset: path to the picked dataset

    """
    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)
    ####################################
    # BUILDING THE MODEL NO CORRUPTION #
    ####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    da = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=28 * 28,
        n_hidden=500
    )

    cost, updates = da.get_cost_updates(
        corruption_level=0.,
        learning_rate=learning_rate
    )

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The no corruption code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((training_time) / 60.))
    image = Image.fromarray(
        tile_raster_images(X=da.W.get_value(borrow=True).T,
                           img_shape=(28, 28), tile_shape=(10, 10),
                           tile_spacing=(1, 1)))
    image.save('filters_corruption_0.png')

    #####################################
    # BUILDING THE MODEL CORRUPTION 30% #
    #####################################

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    da = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=28 * 28,
        n_hidden=500
    )

    cost, updates = da.get_cost_updates(
        corruption_level=0.3,
        learning_rate=learning_rate
    )

    train_da = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size]
        }
    )

    start_time = time.clock()

    ############
    # TRAINING #
    ############

    # go through training epochs
    for epoch in xrange(training_epochs):
        # go through trainng set
        c = []
        for batch_index in xrange(n_train_batches):
            c.append(train_da(batch_index))

        print 'Training epoch %d, cost ' % epoch, numpy.mean(c)

    end_time = time.clock()

    training_time = (end_time - start_time)

    print >> sys.stderr, ('The 30% corruption code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % (training_time / 60.))

    image = Image.fromarray(tile_raster_images(
        X=da.W.get_value(borrow=True).T,
        img_shape=(28, 28), tile_shape=(10, 10),
        tile_spacing=(1, 1)))
    image.save('filters_corruption_30.png')

    os.chdir('../')


if __name__ == '__main__':
    test_dA()


================================================
FILE: DeepLearningTutorials/code/hmc/__init__.py
================================================


================================================
FILE: DeepLearningTutorials/code/hmc/hmc.py
================================================
"""
TODO
"""

import numpy

from theano import function, shared
from theano import tensor as TT
import theano

sharedX = (lambda X, name:
           shared(numpy.asarray(X, dtype=theano.config.floatX), name=name))


def kinetic_energy(vel):
    """Returns the kinetic energy associated with the given velocity
    and mass of 1.

    Parameters
    ----------
    vel: theano matrix
        Symbolic matrix whose rows are velocity vectors.

    Returns
    -------
    return: theano vector
        Vector whose i-th entry is the kinetic entry associated with vel[i].

    """
    return 0.5 * (vel ** 2).sum(axis=1)


def hamiltonian(pos, vel, energy_fn):
    """
    Returns the Hamiltonian (sum of potential and kinetic energy) for the given
    velocity and position.

    Parameters
    ----------
    pos: theano matrix
        Symbolic matrix whose rows are position vectors.
    vel: theano matrix
        Symbolic matrix whose rows are velocity vectors.
    energy_fn: python function
        Python function, operating on symbolic theano variables, used tox
        compute the potential energy at a given position.

    Returns
    -------
    return: theano vector
        Vector whose i-th entry is the Hamiltonian at position pos[i] and
        velocity vel[i].
    """
    # assuming mass is 1
    return energy_fn(pos) + kinetic_energy(vel)


def metropolis_hastings_accept(energy_prev, energy_next, s_rng):
    """
    Performs a Metropolis-Hastings accept-reject move.

    Parameters
    ----------
    energy_prev: theano vector
        Symbolic theano tensor which contains the energy associated with the
        configuration at time-step t.
    energy_next: theano vector
        Symbolic theano tensor which contains the energy associated with the
        proposed configuration at time-step t+1.
    s_rng: theano.tensor.shared_randomstreams.RandomStreams
        Theano shared random stream object used to generate the random number
        used in proposal.

    Returns
    -------
    return: boolean
        True if move is accepted, False otherwise
    """
    ediff = energy_prev - energy_next
    return (TT.exp(ediff) - s_rng.uniform(size=energy_prev.shape)) >= 0


def simulate_dynamics(initial_pos, initial_vel, stepsize, n_steps, energy_fn):
    """
    Return final (position, velocity) obtained after an `n_steps` leapfrog
    updates, using Hamiltonian dynamics.

    Parameters
    ----------
    initial_pos: shared theano matrix
        Initial position at which to start the simulation
    initial_vel: shared theano matrix
        Initial velocity of particles
    stepsize: shared theano scalar
        Scalar value controlling amount by which to move
    energy_fn: python function
        Python function, operating on symbolic theano variables, used to
        compute the potential energy at a given position.

    Returns
    -------
    rval1: theano matrix
        Final positions obtained after simulation
    rval2: theano matrix
        Final velocity obtained after simulation
    """

    def leapfrog(pos, vel, step):
        """
        Inside loop of Scan. Performs one step of leapfrog update, using
        Hamiltonian dynamics.

        Parameters
        ----------
        pos: theano matrix
            in leapfrog update equations, represents pos(t), position at time t
        vel: theano matrix
            in leapfrog update equations, represents vel(t - stepsize/2),
            velocity at time (t - stepsize/2)
        step: theano scalar
            scalar value controlling amount by which to move

        Returns
        -------
        rval1: [theano matrix, theano matrix]
            Symbolic theano matrices for new position pos(t + stepsize), and
            velocity vel(t + stepsize/2)
        rval2: dictionary
            Dictionary of updates for the Scan Op
        """
        # from pos(t) and vel(t-stepsize/2), compute vel(t+stepsize/2)
        dE_dpos = TT.grad(energy_fn(pos).sum(), pos)
        new_vel = vel - step * dE_dpos
        # from vel(t+stepsize/2) compute pos(t+stepsize)
        new_pos = pos + step * new_vel
        return [new_pos, new_vel], {}

    # compute velocity at time-step: t + stepsize/2
    initial_energy = energy_fn(initial_pos)
    dE_dpos = TT.grad(initial_energy.sum(), initial_pos)
    vel_half_step = initial_vel - 0.5 * stepsize * dE_dpos

    # compute position at time-step: t + stepsize
    pos_full_step = initial_pos + stepsize * vel_half_step

    # perform leapfrog updates: the scan op is used to repeatedly compute
    # vel(t + (m-1/2)*stepsize) and pos(t + m*stepsize) for m in [2,n_steps].
    (all_pos, all_vel), scan_updates = theano.scan(
        leapfrog,
        outputs_info=[
            dict(initial=pos_full_step),
            dict(initial=vel_half_step),
        ],
        non_sequences=[stepsize],
        n_steps=n_steps - 1)
    final_pos = all_pos[-1]
    final_vel = all_vel[-1]
    # NOTE: Scan always returns an updates dictionary, in case the
    # scanned function draws samples from a RandomStream. These
    # updates must then be used when compiling the Theano function, to
    # avoid drawing the same random numbers each time the function is
    # called. In this case however, we consciously ignore
    # "scan_updates" because we know it is empty.
    assert not scan_updates

    # The last velocity returned by scan is vel(t +
    # (n_steps - 1 / 2) * stepsize) We therefore perform one more half-step
    # to return vel(t + n_steps * stepsize)
    energy = energy_fn(final_pos)
    final_vel = final_vel - 0.5 * stepsize * TT.grad(energy.sum(), final_pos)

    # return new proposal state
    return final_pos, final_vel


# start-snippet-1
def hmc_move(s_rng, positions, energy_fn, stepsize, n_steps):
    """
    This function performs one-step of Hybrid Monte-Carlo sampling. We start by
    sampling a random velocity from a univariate Gaussian distribution, perform
    `n_steps` leap-frog updates using Hamiltonian dynamics and accept-reject
    using Metropolis-Hastings.

    Parameters
    ----------
    s_rng: theano shared random stream
        Symbolic random number generator used to draw random velocity and
        perform accept-reject move.
    positions: shared theano matrix
        Symbolic matrix whose rows are position vectors.
    energy_fn: python function
        Python function, operating on symbolic theano variables, used to
        compute the potential energy at a given position.
    stepsize:  shared theano scalar
        Shared variable containing the stepsize to use for `n_steps` of HMC
        simulation steps.
    n_steps: integer
        Number of HMC steps to perform before proposing a new position.

    Returns
    -------
    rval1: boolean
        True if move is accepted, False otherwise
    rval2: theano matrix
        Matrix whose rows contain the proposed "new position"
    """
    # end-snippet-1 start-snippet-2
    # sample random velocity
    initial_vel = s_rng.normal(size=positions.shape)
    # end-snippet-2 start-snippet-3
    # perform simulation of particles subject to Hamiltonian dynamics
    final_pos, final_vel = simulate_dynamics(
        initial_pos=positions,
        initial_vel=initial_vel,
        stepsize=stepsize,
        n_steps=n_steps,
        energy_fn=energy_fn
    )
    # end-snippet-3 start-snippet-4
    # accept/reject the proposed move based on the joint distribution
    accept = metropolis_hastings_accept(
        energy_prev=hamiltonian(positions, initial_vel, energy_fn),
        energy_next=hamiltonian(final_pos, final_vel, energy_fn),
        s_rng=s_rng
    )
    # end-snippet-4
    return accept, final_pos


# start-snippet-5
def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept,
                target_acceptance_rate, stepsize_inc, stepsize_dec,
                stepsize_min, stepsize_max, avg_acceptance_slowness):
    """This function is executed after `n_steps` of HMC sampling
    (`hmc_move` function). It creates the updates dictionary used by
    the `simulate` function. It takes care of updating: the position
    (if the move is accepted), the stepsize (to track a given target
    acceptance rate) and the average acceptance rate (computed as a
    moving average).

    Parameters
    ----------
    positions: shared variable, theano matrix
        Shared theano matrix whose rows contain the old position
    stepsize: shared variable, theano scalar
        Shared theano scalar containing current step size
    avg_acceptance_rate: shared variable, theano scalar
        Shared theano scalar containing the current average acceptance rate
    final_pos: shared variable, theano matrix
        Shared theano matrix whose rows contain the new position
    accept: theano scalar
        Boolean-type variable representing whether or not the proposed HMC move
        should be accepted or not.
    target_acceptance_rate: float
        The stepsize is modified in order to track this target acceptance rate.
    stepsize_inc: float
        Amount by which to increment stepsize when acceptance rate is too high.
    stepsize_dec: float
        Amount by which to decrement stepsize when acceptance rate is too low.
    stepsize_min: float
        Lower-bound on `stepsize`.
    stepsize_min: float
        Upper-bound on `stepsize`.
    avg_acceptance_slowness: float
        Average acceptance rate is computed as an exponential moving average.
        (1-avg_acceptance_slowness) is the weight given to the newest
        observation.

    Returns
    -------
    rval1: dictionary-like
        A dictionary of updates to be used by the `HMC_Sampler.simulate`
        function.  The updates target the position, stepsize and average
        acceptance rate.

    """

    ## POSITION UPDATES ##
    # broadcast `accept` scalar to tensor with the same dimensions as
    # final_pos.
    accept_matrix = accept.dimshuffle(0, *(('x',) * (final_pos.ndim - 1)))
    # if accept is True, update to `final_pos` else stay put
    new_positions = TT.switch(accept_matrix, final_pos, positions)
    # end-snippet-5 start-snippet-7
    ## STEPSIZE UPDATES ##
    # if acceptance rate is too low, our sampler is too "noisy" and we reduce
    # the stepsize. If it is too high, our sampler is too conservative, we can
    # get away with a larger stepsize (resulting in better mixing).
    _new_stepsize = TT.switch(avg_acceptance_rate > target_acceptance_rate,
                              stepsize * stepsize_inc, stepsize * stepsize_dec)
    # maintain stepsize in [stepsize_min, stepsize_max]
    new_stepsize = TT.clip(_new_stepsize, stepsize_min, stepsize_max)

    # end-snippet-7 start-snippet-6
    ## ACCEPT RATE UPDATES ##
    # perform exponential moving average
    mean_dtype = theano.scalar.upcast(accept.dtype, avg_acceptance_rate.dtype)
    new_acceptance_rate = TT.add(
        avg_acceptance_slowness * avg_acceptance_rate,
        (1.0 - avg_acceptance_slowness) * accept.mean(dtype=mean_dtype))
    # end-snippet-6 start-snippet-8
    return [(positions, new_positions),
            (stepsize, new_stepsize),
            (avg_acceptance_rate, new_acceptance_rate)]
    # end-snippet-8


class HMC_sampler(object):
    """
    Convenience wrapper for performing Hybrid Monte Carlo (HMC). It creates the
    symbolic graph for performing an HMC simulation (using `hmc_move` and
    `hmc_updates`). The graph is then compiled into the `simulate` function, a
    theano function which runs the simulation and updates the required shared
    variables.

    Users should interface with the sampler thorugh the `draw` function which
    advances the markov chain and returns the current sample by calling
    `simulate` and `get_position` in sequence.

    The hyper-parameters are the same as those used by Marc'Aurelio's
    'train_mcRBM.py' file (available on his personal home page).
    """

    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

    @classmethod
    def new_from_shared_positions(
        cls,
        shared_positions,
        energy_fn,
        initial_stepsize=0.01,
        target_acceptance_rate=.9,
        n_steps=20,
        stepsize_dec=0.98,
        stepsize_min=0.001,
        stepsize_max=0.25,
        stepsize_inc=1.02,
        # used in geometric avg. 1.0 would be not moving at all
        avg_acceptance_slowness=0.9,
        seed=12345
    ):
        """
        :param shared_positions: theano ndarray shared var with
            many particle [initial] positions

        :param energy_fn:
            callable such that energy_fn(positions)
            returns theano vector of energies.
            The len of this vector is the batchsize.

            The sum of this energy vector must be differentiable (with
            theano.tensor.grad) with respect to the positions for HMC
            sampling to work.

        """
        # allocate shared variables
        stepsize = sharedX(initial_stepsize, 'hmc_stepsize')
        avg_acceptance_rate = sharedX(target_acceptance_rate,
                                      'avg_acceptance_rate')
        s_rng = TT.shared_randomstreams.RandomStreams(seed)

        # define graph for an `n_steps` HMC simulation
        accept, final_pos = hmc_move(
            s_rng,
            shared_positions,
            energy_fn,
            stepsize,
            n_steps)

        # define the dictionary of updates, to apply on every `simulate` call
        simulate_updates = hmc_updates(
            shared_positions,
            stepsize,
            avg_acceptance_rate,
            final_pos=final_pos,
            accept=accept,
            stepsize_min=stepsize_min,
            stepsize_max=stepsize_max,
            stepsize_inc=stepsize_inc,
            stepsize_dec=stepsize_dec,
            target_acceptance_rate=target_acceptance_rate,
            avg_acceptance_slowness=avg_acceptance_slowness)

        # compile theano function
        simulate = function([], [], updates=simulate_updates)

        # create HMC_sampler object with the following attributes ...
        return cls(
            positions=shared_positions,
            stepsize=stepsize,
            stepsize_min=stepsize_min,
            stepsize_max=stepsize_max,
            avg_acceptance_rate=avg_acceptance_rate,
            target_acceptance_rate=target_acceptance_rate,
            s_rng=s_rng,
            _updates=simulate_updates,
            simulate=simulate)

    def draw(self, **kwargs):
        """
        Returns a new position obtained after `n_steps` of HMC simulation.

        Parameters
        ----------
        kwargs: dictionary
            The `kwargs` dictionary is passed to the shared variable
            (self.positions) `get_value()` function.  For example, to avoid
            copying the shared variable value, consider passing `borrow=True`.

        Returns
        -------
        rval: numpy matrix
            Numpy matrix whose of dimensions similar to `initial_position`.
       """
        self.simulate()
        return self.positions.get_value(borrow=False)


================================================
FILE: DeepLearningTutorials/code/hmc/test_hmc.py
================================================
import numpy
from scipy import linalg
import theano

from hmc import HMC_sampler


def sampler_on_nd_gaussian(sampler_cls, burnin, n_samples, dim=10):
    batchsize = 3

    rng = numpy.random.RandomState(123)

    # Define a covariance and mu for a gaussian
    mu = numpy.array(rng.rand(dim) * 10, dtype=theano.config.floatX)
    cov = numpy.array(rng.rand(dim, dim), dtype=theano.config.floatX)
    cov = (cov + cov.T) / 2.
    cov[numpy.arange(dim), numpy.arange(dim)] = 1.0
    cov_inv = linalg.inv(cov)

    # Define energy function for a multi-variate Gaussian
    def gaussian_energy(x):
        return 0.5 * (theano.tensor.dot((x - mu), cov_inv) *
                      (x - mu)).sum(axis=1)

    # Declared shared random variable for positions
    position = rng.randn(batchsize, dim).astype(theano.config.floatX)
    position = theano.shared(position)

    # Create HMC sampler
    sampler = sampler_cls(position, gaussian_energy,
                          initial_stepsize=1e-3, stepsize_max=0.5)

    # Start with a burn-in process
    garbage = [sampler.draw() for r in xrange(burnin)]  # burn-in Draw
    # `n_samples`: result is a 3D tensor of dim [n_samples, batchsize,
    # dim]
    _samples = numpy.asarray([sampler.draw() for r in xrange(n_samples)])
    # Flatten to [n_samples * batchsize, dim]
    samples = _samples.T.reshape(dim, -1).T

    print '****** TARGET VALUES ******'
    print 'target mean:', mu
    print 'target cov:\n', cov

    print '****** EMPIRICAL MEAN/COV USING HMC ******'
    print 'empirical mean: ', samples.mean(axis=0)
    print 'empirical_cov:\n', numpy.cov(samples.T)

    print '****** HMC INTERNALS ******'
    print 'final stepsize', sampler.stepsize.get_value()
    print 'final acceptance_rate', sampler.avg_acceptance_rate.get_value()

    return sampler


def test_hmc():
    sampler = sampler_on_nd_gaussian(HMC_sampler.new_from_shared_positions,
                                     burnin=1000, n_samples=1000, dim=5)
    assert abs(sampler.avg_acceptance_rate.get_value() -
               sampler.target_acceptance_rate) < .1
    assert sampler.stepsize.get_value() >= sampler.stepsize_min
    assert sampler.stepsize.get_value() <= sampler.stepsize_max


================================================
FILE: DeepLearningTutorials/code/imdb.py
================================================
import cPickle
import gzip
import os

import numpy
import theano


def prepare_data(seqs, labels, maxlen=None):
    """Create the matrices from the datasets.

    This pad each sequence to the same lenght: the lenght of the
    longuest sequence or maxlen.

    if maxlen is set, we will cut all sequence to this maximum
    lenght.

    This swap the axis!
    """
    # x: a list of sentences
    lengths = [len(s) for s in seqs]

    if maxlen is not None:
        new_seqs = []
        new_labels = []
        new_lengths = []
        for l, s, y in zip(lengths, seqs, labels):
            if l < maxlen:
                new_seqs.append(s)
                new_labels.append(y)
                new_lengths.append(l)
        lengths = new_lengths
        labels = new_labels
        seqs = new_seqs

        if len(lengths) < 1:
            return None, None, None

    n_samples = len(seqs)
    maxlen = numpy.max(lengths)

    x = numpy.zeros((maxlen, n_samples)).astype('int64')
    x_mask = numpy.zeros((maxlen, n_samples)).astype(theano.config.floatX)
    for idx, s in enumerate(seqs):
        x[:lengths[idx], idx] = s
        x_mask[:lengths[idx], idx] = 1.

    return x, x_mask, labels


def get_dataset_file(dataset, default_dataset, origin):
    '''Look for it as if it was a full path, if not, try local file,
    if not try in the data directory.

    Download dataset if it is not present

    '''
    data_dir, data_file = os.path.split(dataset)
    if data_dir == "" and not os.path.isfile(dataset):
        # Check if dataset is in the data directory.
        new_path = os.path.join(
            os.path.split(__file__)[0],
            "..",
            "data",
            dataset
        )
        if os.path.isfile(new_path) or data_file == default_dataset:
            dataset = new_path

    if (not os.path.isfile(dataset)) and data_file == default_dataset:
        import urllib
        print 'Downloading data from %s' % origin
        urllib.urlretrieve(origin, dataset)
    return dataset


def load_data(path="imdb.pkl", n_words=100000, valid_portion=0.1, maxlen=None,
              sort_by_len=True):
    '''Loads the dataset

    :type path: String
    :param path: The path to the dataset (here IMDB)
    :type n_words: int
    :param n_words: The number of word to keep in the vocabulary.
        All extra words are set to unknow (1).
    :type valid_portion: float
    :param valid_portion: The proportion of the full train set used for
        the validation set.
    :type maxlen: None or positive int
    :param maxlen: the max sequence length we use in the train/valid set.
    :type sort_by_len: bool
    :name sort_by_len: Sort by the sequence lenght for the train,
        valid and test set. This allow faster execution as it cause
        less padding per minibatch. Another mechanism must be used to
        shuffle the train set at each epoch.

    '''

    #############
    # LOAD DATA #
    #############

    # Load the dataset
    path = get_dataset_file(
        path, "imdb.pkl",
        "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")

    if path.endswith(".gz"):
        f = gzip.open(path, 'rb')
    else:
        f = open(path, 'rb')

    train_set = cPickle.load(f)
    test_set = cPickle.load(f)
    f.close()
    if maxlen:
        new_train_set_x = []
        new_train_set_y = []
        for x, y in zip(train_set[0], train_set[1]):
            if len(x) < maxlen:
                new_train_set_x.append(x)
                new_train_set_y.append(y)
        train_set = (new_train_set_x, new_train_set_y)
        del new_train_set_x, new_train_set_y

    # split training set into validation set
    train_set_x, train_set_y = train_set
    n_samples = len(train_set_x)
    sidx = numpy.random.permutation(n_samples)
    n_train = int(numpy.round(n_samples * (1. - valid_portion)))
    valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
    valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
    train_set_x = [train_set_x[s] for s in sidx[:n_train]]
    train_set_y = [train_set_y[s] for s in sidx[:n_train]]

    train_set = (train_set_x, train_set_y)
    valid_set = (valid_set_x, valid_set_y)

    def remove_unk(x):
        return [[1 if w >= n_words else w for w in sen] for sen in x]

    test_set_x, test_set_y = test_set
    valid_set_x, valid_set_y = valid_set
    train_set_x, train_set_y = train_set

    train_set_x = remove_unk(train_set_x)
    valid_set_x = remove_unk(valid_set_x)
    test_set_x = remove_unk(test_set_x)

    def len_argsort(seq):
        return sorted(range(len(seq)), key=lambda x: len(seq[x]))

    if sort_by_len:
        sorted_index = len_argsort(test_set_x)
        test_set_x = [test_set_x[i] for i in sorted_index]
        test_set_y = [test_set_y[i] for i in sorted_index]

        sorted_index = len_argsort(valid_set_x)
        valid_set_x = [valid_set_x[i] for i in sorted_index]
        valid_set_y = [valid_set_y[i] for i in sorted_index]

        sorted_index = len_argsort(train_set_x)
        train_set_x = [train_set_x[i] for i in sorted_index]
        train_set_y = [train_set_y[i] for i in sorted_index]

    train = (train_set_x, train_set_y)
    valid = (valid_set_x, valid_set_y)
    test = (test_set_x, test_set_y)

    return train, valid, test


================================================
FILE: DeepLearningTutorials/code/imdb_preprocess.py
================================================
"""
This script is what created the dataset pickled.

1) You need to download this file and put it in the same directory as this file.
https://github.com/moses-smt/mosesdecoder/raw/master/scripts/tokenizer/tokenizer.perl . Give it execution permission.

2) Get the dataset from http://ai.stanford.edu/~amaas/data/sentiment/ and extract it in the current directory.

3) Then run this script.
"""

dataset_path='/Tmp/bastienf/aclImdb/'

import numpy
import cPickle as pkl

from collections import OrderedDict

import glob
import os

from subprocess import Popen, PIPE

# tokenizer.perl is from Moses: https://github.com/moses-smt/mosesdecoder/tree/master/scripts/tokenizer
tokenizer_cmd = ['./tokenizer.perl', '-l', 'en', '-q', '-']


def tokenize(sentences):

    print 'Tokenizing..',
    text = "\n".join(sentences)
    tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
    tok_text, _ = tokenizer.communicate(text)
    toks = tok_text.split('\n')[:-1]
    print 'Done'

    return toks


def build_dict(path):
    sentences = []
    currdir = os.getcwd()
    os.chdir('%s/pos/' % path)
    for ff in glob.glob("*.txt"):
        with open(ff, 'r') as f:
            sentences.append(f.readline().strip())
    os.chdir('%s/neg/' % path)
    for ff in glob.glob("*.txt"):
        with open(ff, 'r') as f:
            sentences.append(f.readline().strip())
    os.chdir(currdir)

    sentences = tokenize(sentences)

    print 'Building dictionary..',
    wordcount = dict()
    for ss in sentences:
        words = ss.strip().lower().split()
        for w in words:
            if w not in wordcount:
                wordcount[w] = 1
            else:
                wordcount[w] += 1

    counts = wordcount.values()
    keys = wordcount.keys()

    sorted_idx = numpy.argsort(counts)[::-1]

    worddict = dict()

    for idx, ss in enumerate(sorted_idx):
        worddict[keys[ss]] = idx+2  # leave 0 and 1 (UNK)

    print numpy.sum(counts), ' total words ', len(keys), ' unique words'

    return worddict


def grab_data(path, dictionary):
    sentences = []
    currdir = os.getcwd()
    os.chdir(path)
    for ff in glob.glob("*.txt"):
        with open(ff, 'r') as f:
            sentences.append(f.readline().strip())
    os.chdir(currdir)
    sentences = tokenize(sentences)

    seqs = [None] * len(sentences)
    for idx, ss in enumerate(sentences):
        words = ss.strip().lower().split()
        seqs[idx] = [dictionary[w] if w in dictionary else 1 for w in words]

    return seqs


def main():
    # Get the dataset from http://ai.stanford.edu/~amaas/data/sentiment/
    path = dataset_path
    dictionary = build_dict(os.path.join(path, 'train'))

    train_x_pos = grab_data(path+'train/pos', dictionary)
    train_x_neg = grab_data(path+'train/neg', dictionary)
    train_x = train_x_pos + train_x_neg
    train_y = [1] * len(train_x_pos) + [0] * len(train_x_neg)

    test_x_pos = grab_data(path+'test/pos', dictionary)
    test_x_neg = grab_data(path+'test/neg', dictionary)
    test_x = test_x_pos + test_x_neg
    test_y = [1] * len(test_x_pos) + [0] * len(test_x_neg)

    f = open('imdb.pkl', 'wb')
    pkl.dump((train_x, train_y), f, -1)
    pkl.dump((test_x, test_y), f, -1)
    f.close()

    f = open('imdb.dict.pkl', 'wb')
    pkl.dump(dictionary, f, -1)
    f.close()

if __name__ == '__main__':
    main()


================================================
FILE: DeepLearningTutorials/code/logistic_cg.py
================================================
"""
This tutorial introduces logistic regression using Theano and conjugate
gradient descent.

Logistic regression is a probabilistic, linear classifier. It is parametrized
by a weight matrix :math:`W` and a bias vector :math:`b`. Classification is
done by projecting data points onto a set of hyperplanes, the distance to
which is used to determine a class membership probability.

Mathematically, this can be written as:

.. math::
  P(Y=i|x, W,b) &= softmax_i(W x + b) \\
                &= \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}}


The output of the model or prediction is then done by taking the argmax of
the vector whose i'th element is P(Y=i|x).

.. math::

  y_{pred} = argmax_i P(Y=i|x,W,b)


This tutorial presents a conjugate gradient optimization method that is
suitable for smaller datasets.


References:

   - textbooks: "Pattern Recognition and Machine Learning" -
                 Christopher M. Bishop, section 4.3.2


"""
__docformat__ = 'restructedtext en'


import os
import sys
import time

import numpy

import theano
import theano.tensor as T

from logistic_sgd import load_data


class LogisticRegression(object):
    """Multi-class Logistic Regression Class

    The logistic regression is fully described by a weight matrix :math:`W`
    and bias vector :math:`b`. Classification is done by projecting data
    points onto a set of hyperplanes, the distance to which is used to
    determine a class membership probability.
    """

    def __init__(self, input, n_in, n_out):
        """ Initialize the parameters of the logistic regression

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
                      architecture ( one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
                     which the datapoint lies

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
                      which the target lies

        """

        # initialize theta = (W,b) with 0s; W gets the shape (n_in, n_out),
        # while b is a vector of n_out elements, making theta a vector of
        # n_in*n_out + n_out elements
        self.theta = theano.shared(
            value=numpy.zeros(
                n_in * n_out + n_out,
                dtype=theano.config.floatX
            ),
            name='theta',
            borrow=True
        )
        # W is represented by the fisr n_in*n_out elements of theta
        self.W = self.theta[0:n_in * n_out].reshape((n_in, n_out))
        # b is the rest (last n_out elements)
        self.b = self.theta[n_in * n_out:n_in * n_out + n_out]

        # compute vector of class-membership probabilities in symbolic form
        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)

        # compute prediction as class whose probability is maximal in
        # symbolic form
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)

    def negative_log_likelihood(self, y):
        """Return the negative log-likelihood of the prediction of this model
        under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|}\mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|}\sum_{i=0}^{|\mathcal{D}|}
                \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
            \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        """
        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])

    def errors(self, y):
        """Return a float representing the number of errors in the minibatch
        over the total number of examples of the minibatch

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example
                  the correct label
        """

        # check if y has same dimension of y_pred
        if y.ndim != self.y_pred.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', self.y_pred.type)
            )
        # check if y is of the correct datatype
        if y.dtype.startswith('int'):
            # the T.neq operator returns a vector of 0s and 1s, where 1
            # represents a mistake in prediction
            return T.mean(T.neq(self.y_pred, y))
        else:
            raise NotImplementedError()


def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
    """Demonstrate conjugate gradient optimization of a log-linear model

    This is demonstrated on MNIST.

    :type n_epochs: int
    :param n_epochs: number of epochs to run the optimizer

    :type mnist_pkl_gz: string
    :param mnist_pkl_gz: the path of the mnist training file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """
    #############
    # LOAD DATA #
    #############
    datasets = load_data(mnist_pkl_gz)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    batch_size = 600    # size of the minibatch

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    n_in = 28 * 28  # number of input units
    n_out = 10  # number of output units

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    minibatch_offset = T.lscalar()  # offset to the start of a [mini]batch
    x = T.matrix()   # the data is presented as rasterized images
    y = T.ivector()  # the labels are presented as 1D vector of
                     # [int] labels

    # construct the logistic regression class
    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y).mean()

    # compile a theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        [minibatch_offset],
        classifier.errors(y),
        givens={
            x: test_set_x[minibatch_offset:minibatch_offset + batch_size],
            y: test_set_y[minibatch_offset:minibatch_offset + batch_size]
        },
        name="test"
    )

    validate_model = theano.function(
        [minibatch_offset],
        classifier.errors(y),
        givens={
            x: valid_set_x[minibatch_offset: minibatch_offset + batch_size],
            y: valid_set_y[minibatch_offset: minibatch_offset + batch_size]
        },
        name="validate"
    )

    #  compile a theano function that returns the cost of a minibatch
    batch_cost = theano.function(
        [minibatch_offset],
        cost,
        givens={
            x: train_set_x[minibatch_offset: minibatch_offset + batch_size],
            y: train_set_y[minibatch_offset: minibatch_offset + batch_size]
        },
        name="batch_cost"
    )

    # compile a theano function that returns the gradient of the minibatch
    # with respect to theta
    batch_grad = theano.function(
        [minibatch_offset],
        T.grad(cost, classifier.theta),
        givens={
            x: train_set_x[minibatch_offset: minibatch_offset + batch_size],
            y: train_set_y[minibatch_offset: minibatch_offset + batch_size]
        },
        name="batch_grad"
    )

    # creates a function that computes the average cost on the training set
    def train_fn(theta_value):
        classifier.theta.set_value(theta_value, borrow=True)
        train_losses = [batch_cost(i * batch_size)
                        for i in xrange(n_train_batches)]
        return numpy.mean(train_losses)

    # creates a function that computes the average gradient of cost with
    # respect to theta
    def train_fn_grad(theta_value):
        classifier.theta.set_value(theta_value, borrow=True)
        grad = batch_grad(0)
        for i in xrange(1, n_train_batches):
            grad += batch_grad(i * batch_size)
        return grad / n_train_batches

    validation_scores = [numpy.inf, 0]

    # creates the validation function
    def callback(theta_value):
        classifier.theta.set_value(theta_value, borrow=True)
        #compute the validation loss
        validation_losses = [validate_model(i * batch_size)
                             for i in xrange(n_valid_batches)]
        this_validation_loss = numpy.mean(validation_losses)
        print('validation error %f %%' % (this_validation_loss * 100.,))

        # check if it is better then best validation score got until now
        if this_validation_loss < validation_scores[0]:
            # if so, replace the old one, and compute the score on the
            # testing dataset
            validation_scores[0] = this_validation_loss
            test_losses = [test_model(i * batch_size)
                           for i in xrange(n_test_batches)]
            validation_scores[1] = numpy.mean(test_losses)

    ###############
    # TRAIN MODEL #
    ###############

    # using scipy conjugate gradient optimizer
    import scipy.optimize
    print ("Optimizing using scipy.optimize.fmin_cg...")
    start_time = time.clock()
    best_w_b = scipy.optimize.fmin_cg(
        f=train_fn,
        x0=numpy.zeros((n_in + 1) * n_out, dtype=x.dtype),
        fprime=train_fn_grad,
        callback=callback,
        disp=0,
        maxiter=n_epochs
    )
    end_time = time.clock()
    print(
        (
            'Optimization complete with best validation score of %f %%, with '
            'test performance %f %%'
        )
        % (validation_scores[0] * 100., validation_scores[1] * 100.)
    )

    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))


if __name__ == '__main__':
    cg_optimization_mnist()


================================================
FILE: DeepLearningTutorials/code/logistic_sgd.py
================================================
"""
This tutorial introduces logistic regression using Theano and stochastic
gradient descent.

Logistic regression is a probabilistic, linear classifier. It is parametrized
by a weight matrix :math:`W` and a bias vector :math:`b`. Classification is
done by projecting data points onto a set of hyperplanes, the distance to
which is used to determine a class membership probability.

Mathematically, this can be written as:

.. math::
  P(Y=i|x, W,b) &= softmax_i(W x + b) \\
                &= \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}}


The output of the model or prediction is then done by taking the argmax of
the vector whose i'th element is P(Y=i|x).

.. math::

  y_{pred} = argmax_i P(Y=i|x,W,b)


This tutorial presents a stochastic gradient descent optimization method
suitable for large datasets.


References:

    - textbooks: "Pattern Recognition and Machine Learning" -
                 Christopher M. Bishop, section 4.3.2

"""
__docformat__ = 'restructedtext en'

import cPickle
import gzip
import os
import sys
import time

import numpy

import theano
import theano.tensor as T


class LogisticRegression(object):
    """Multi-class Logistic Regression Class

    The logistic regression is fully described by a weight matrix :math:`W`
    and bias vector :math:`b`. Classification is done by projecting data
    points onto a set of hyperplanes, the distance to which is used to
    determine a class membership probability.
    """

    def __init__(self, input, n_in, n_out):
        """ Initialize the parameters of the logistic regression

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
                      architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
                     which the datapoints lie

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
                      which the labels lie

        """
        # start-snippet-1
        # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
        self.W = theano.shared(
            value=numpy.zeros(
                (n_in, n_out),
                dtype=theano.config.floatX
            ),
            name='W',
            borrow=True
        )
        # initialize the baises b as a vector of n_out 0s
        self.b = theano.shared(
            value=numpy.zeros(
                (n_out,),
                dtype=theano.config.floatX
            ),
            name='b',
            borrow=True
        )

        # symbolic expression for computing the matrix of class-membership
        # probabilities
        # Where:
        # W is a matrix where column-k represent the separation hyper plain for
        # class-k
        # x is a matrix where row-j  represents input training sample-j
        # b is a vector where element-k represent the free parameter of hyper
        # plain-k
        self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)

        # symbolic description of how to compute prediction as class whose
        # probability is maximal
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
        # end-snippet-1

        # parameters of the model
        self.params = [self.W, self.b]

    def negative_log_likelihood(self, y):
        """Return the mean of the negative log-likelihood of the prediction
        of this model under a given target distribution.

        .. math::

            \frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
            \frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
                \log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
            \ell (\theta=\{W,b\}, \mathcal{D})

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label

        Note: we use the mean instead of the sum so that
              the learning rate is less dependent on the batch size
        """
        # start-snippet-2
        # y.shape[0] is (symbolically) the number of rows in y, i.e.,
        # number of examples (call it n) in the minibatch
        # T.arange(y.shape[0]) is a symbolic vector which will contain
        # [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
        # Log-Probabilities (call it LP) with one row per example and
        # one column per class LP[T.arange(y.shape[0]),y] is a vector
        # v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
        # LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
        # the mean (across minibatch examples) of the elements in v,
        # i.e., the mean log-likelihood across the minibatch.
        return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
        # end-snippet-2

    def errors(self, y):
        """Return a float representing the number of errors in the minibatch
        over the total number of examples of the minibatch ; zero one
        loss over the size of the minibatch

        :type y: theano.tensor.TensorType
        :param y: corresponds to a vector that gives for each example the
                  correct label
        """

        # check if y has same dimension of y_pred
        if y.ndim != self.y_pred.ndim:
            raise TypeError(
                'y should have the same shape as self.y_pred',
                ('y', y.type, 'y_pred', self.y_pred.type)
            )
        # check if y is of the correct datatype
        if y.dtype.startswith('int'):
            # the T.neq operator returns a vector of 0s and 1s, where 1
            # represents a mistake in prediction
            return T.mean(T.neq(self.y_pred, y))
        else:
            raise NotImplementedError()


def load_data(dataset):
    ''' Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset (here MNIST)
    '''

    #############
    # LOAD DATA #
    #############

    # Download the MNIST dataset if it is not present
    data_dir, data_file = os.path.split(dataset)
    if data_dir == "" and not os.path.isfile(dataset):
        # Check if dataset is in the data directory.
        new_path = os.path.join(
            os.path.split(__file__)[0],
            "..",
            "data",
            dataset
        )
        if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
            dataset = new_path

    if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
        import urllib
        origin = (
            'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
        )
        print 'Downloading data from %s' % origin
        urllib.urlretrieve(origin, dataset)

    print '... loading data'

    # Load the dataset
    f = gzip.open(dataset, 'rb')
    train_set, valid_set, test_set = cPickle.load(f)
    f.close()
    #train_set, valid_set, test_set format: tuple(input, target)
    #input is an numpy.ndarray of 2 dimensions (a matrix)
    #witch row's correspond to an example. target is a
    #numpy.ndarray of 1 dimensions (vector)) that have the same length as
    #the number of rows in the input. It should give the target
    #target to the example with the same index in the input.

    def shared_dataset(data_xy, borrow=True):
        """ Function that loads the dataset into shared variables

        The reason we store our dataset in shared variables is to allow
        Theano to copy it into the GPU memory (when code is run on GPU).
        Since copying data into the GPU is slow, copying a minibatch everytime
        is needed (the default behaviour if the data is not in a shared
        variable) would lead to a large decrease in performance.
        """
        data_x, data_y = data_xy
        shared_x = theano.shared(numpy.asarray(data_x,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        shared_y = theano.shared(numpy.asarray(data_y,
                                               dtype=theano.config.floatX),
                                 borrow=borrow)
        # When storing data on the GPU it has to be stored as floats
        # therefore we will store the labels as ``floatX`` as well
        # (``shared_y`` does exactly that). But during our computations
        # we need them as ints (we use labels as index, and if they are
        # floats it doesn't make sense) therefore instead of returning
        # ``shared_y`` we will have to cast it to int. This little hack
        # lets ous get around this issue
        return shared_x, T.cast(shared_y, 'int32')

    test_set_x, test_set_y = shared_dataset(test_set)
    valid_set_x, valid_set_y = shared_dataset(valid_set)
    train_set_x, train_set_y = shared_dataset(train_set)

    rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
            (test_set_x, test_set_y)]
    return rval


def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
                           dataset='mnist.pkl.gz',
                           batch_size=600):
    """
    Demonstrate stochastic gradient descent optimization of a log-linear
    model

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
                          gradient)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz

    """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch

    # generate symbolic variables for input (x and y represent a
    # minibatch)
    x = T.matrix('x')  # data, presented as rasterized images
    y = T.ivector('y')  # labels, presented as 1D vector of [int] labels

    # construct the logistic regression class
    # Each MNIST image has size 28*28
    classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)

    # the cost we minimize during training is the negative log likelihood of
    # the model in symbolic format
    cost = classifier.negative_log_likelihood(y)

    # compiling a Theano function that computes the mistakes that are made by
    # the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            y: valid_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    # compute the gradient of cost with respect to theta = (W,b)
    g_W = T.grad(cost=cost, wrt=classifier.W)
    g_b = T.grad(cost=cost, wrt=classifier.b)

    # start-snippet-3
    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs.
    updates = [(classifier.W, classifier.W - learning_rate * g_W),
               (classifier.b, classifier.b - learning_rate * g_b)]

    # compiling a Theano function `train_model` that returns the cost, but in
    # the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-3

    ###############
    # TRAIN MODEL #
    ###############
    print '... training the model'
    # early-stopping parameters
    patience = 5000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                                  # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                  # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    test_score = 0.
    start_time = time.clock()

    done_looping = False
    epoch = 0
    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i)
                                     for i in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if this_validation_loss < best_validation_loss *  \
                       improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    # test it on the test set

                    test_losses = [test_model(i)
                                   for i in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(
                        (
                            '     epoch %i, minibatch %i/%i, test error of'
                            ' best model %f %%'
                        ) %
                        (
                            epoch,
                            minibatch_index + 1,
                            n_train_batches,
                            test_score * 100.
                        )
                    )

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(
        (
            'Optimization complete with best validation score of %f %%,'
            'with test performance %f %%'
        )
        % (best_validation_loss * 100., test_score * 100.)
    )
    print 'The code run for %d epochs, with %f epochs/sec' % (
        epoch, 1. * epoch / (end_time - start_time))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.1fs' % ((end_time - start_time)))

if __name__ == '__main__':
    sgd_optimization_mnist()


================================================
FILE: DeepLearningTutorials/code/lstm.py
================================================
'''
Build a tweet sentiment analyzer
'''
from collections import OrderedDict
import cPickle as pkl
import random
import sys
import time

import numpy
import theano
from theano import config
import theano.tensor as tensor
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams

import imdb

datasets = {'imdb': (imdb.load_data, imdb.prepare_data)}


def numpy_floatX(data):
    return numpy.asarray(data, dtype=config.floatX)


def get_minibatches_idx(n, minibatch_size, shuffle=False):
    """
    Used to shuffle the dataset at each iteration.
    """

    idx_list = numpy.arange(n, dtype="int32")

    if shuffle:
        random.shuffle(idx_list)

    minibatches = []
    minibatch_start = 0
    for i in range(n // minibatch_size):
        minibatches.append(idx_list[minibatch_start:
                                    minibatch_start + minibatch_size])
        minibatch_start += minibatch_size

    if (minibatch_start != n):
        # Make a minibatch out of what is left
        minibatches.append(idx_list[minibatch_start:])

    return zip(range(len(minibatches)), minibatches)


def get_dataset(name):
    return datasets[name][0], datasets[name][1]


def zipp(params, tparams):
    """
    When we reload the model. Needed for the GPU stuff.
    """
    for kk, vv in params.iteritems():
        tparams[kk].set_value(vv)


def unzip(zipped):
    """
    When we pickle the model. Needed for the GPU stuff.
    """
    new_params = OrderedDict()
    for kk, vv in zipped.iteritems():
        new_params[kk] = vv.get_value()
    return new_params


def dropout_layer(state_before, use_noise, trng):
    proj = tensor.switch(use_noise,
                         (state_before *
                          trng.binomial(state_before.shape,
                                        p=0.5, n=1,
                                        dtype=state_before.dtype)),
                         state_before * 0.5)
    return proj


def _p(pp, name):
    return '%s_%s' % (pp, name)


def init_params(options):
    """
    Global (not LSTM) parameter. For the embeding and the classifier.
    """
    params = OrderedDict()
    # embedding
    randn = numpy.random.rand(options['n_words'],
                              options['dim_proj'])
    params['Wemb'] = (0.01 * randn).astype(config.floatX)
    params = get_layer(options['encoder'])[0](options,
                                              params,
                                              prefix=options['encoder'])
    # classifier
    params['U'] = 0.01 * numpy.random.randn(options['dim_proj'],
                                            options['ydim']).astype(config.floatX)
    params['b'] = numpy.zeros((options['ydim'],)).astype(config.floatX)

    return params


def load_params(path, params):
    pp = numpy.load(path)
    for kk, vv in params.iteritems():
        if kk not in pp:
            raise Warning('%s is not in the archive' % kk)
        params[kk] = pp[kk]

    return params


def init_tparams(params):
    tparams = OrderedDict()
    for kk, pp in params.iteritems():
        tparams[kk] = theano.shared(params[kk], name=kk)
    return tparams


def get_layer(name):
    fns = layers[name]
    return fns


def ortho_weight(ndim):
    W = numpy.random.randn(ndim, ndim)
    u, s, v = numpy.linalg.svd(W)
    return u.astype(config.floatX)


def param_init_lstm(options, params, prefix='lstm'):
    """
    Init the LSTM parameter:

    :see: init_params
    """
    W = numpy.concatenate([ortho_weight(options['dim_proj']),
                           ortho_weight(options['dim_proj']),
                           ortho_weight(options['dim_proj']),
                           ortho_weight(options['dim_proj'])], axis=1)
    params[_p(prefix, 'W')] = W
    U = numpy.concatenate([ortho_weight(options['dim_proj']),
                           ortho_weight(options['dim_proj']),
                           ortho_weight(options['dim_proj']),
                           ortho_weight(options['dim_proj'])], axis=1)
    params[_p(prefix, 'U')] = U
    b = numpy.zeros((4 * options['dim_proj'],))
    params[_p(prefix, 'b')] = b.astype(config.floatX)

    return params


def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
    nsteps = state_below.shape[0]
    if state_below.ndim == 3:
        n_samples = state_below.shape[1]
    else:
        n_samples = 1

    assert mask is not None

    def _slice(_x, n, dim):
        if _x.ndim == 3:
            return _x[:, :, n * dim:(n + 1) * dim]
        return _x[:, n * dim:(n + 1) * dim]

    def _step(m_, x_, h_, c_):
        preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
        preact += x_
        preact += tparams[_p(prefix, 'b')]

        i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
        f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
        o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
        c = tensor.tanh(_slice(preact, 3, options['dim_proj']))

        c = f * c_ + i * c
        c = m_[:, None] * c + (1. - m_)[:, None] * c_

        h = o * tensor.tanh(c)
        h = m_[:, None] * h + (1. - m_)[:, None] * h_

        return h, c

    state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
                   tparams[_p(prefix, 'b')])

    dim_proj = options['dim_proj']
    rval, updates = theano.scan(_step,
                                sequences=[mask, state_below],
                                outputs_info=[tensor.alloc(numpy_floatX(0.),
                                                           n_samples,
                                                           dim_proj),
                                              tensor.alloc(numpy_floatX(0.),
                                                           n_samples,
                                                           dim_proj)],
                                name=_p(prefix, '_layers'),
                                n_steps=nsteps)
    return rval[0]


# ff: Feed Forward (normal neural net), only useful to put after lstm
#     before the classifier.
layers = {'lstm': (param_init_lstm, lstm_layer)}


def sgd(lr, tparams, grads, x, mask, y, cost):
    """ Stochastic Gradient Descent

    :note: A more complicated version of sgd then needed.  This is
        done like that for adadelta and rmsprop.

    """
    # New set of shared variable that will contain the gradient
    # for a mini-batch.
    gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
               for k, p in tparams.iteritems()]
    gsup = [(gs, g) for gs, g in zip(gshared, grads)]

    # Function that computes gradients for a mini-batch, but do not
    # updates the weights.
    f_grad_shared = theano.function([x, mask, y], cost, updates=gsup,
                                    name='sgd_f_grad_shared')

    pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)]

    # Function that updates the weights from the previously computed
    # gradient.
    f_update = theano.function([lr], [], updates=pup,
                               name='sgd_f_update')

    return f_grad_shared, f_update


def adadelta(lr, tparams, grads, x, mask, y, cost):
    zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                  name='%s_grad' % k)
                    for k, p in tparams.iteritems()]
    running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                 name='%s_rup2' % k)
                   for k, p in tparams.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                    name='%s_rgrad2' % k)
                      for k, p in tparams.iteritems()]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
             for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function([x, mask, y], cost, updates=zgup + rg2up,
                                    name='adadelta_f_grad_shared')

    updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
             for zg, ru2, rg2 in zip(zipped_grads,
                                     running_up2,
                                     running_grads2)]
    ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
             for ru2, ud in zip(running_up2, updir)]
    param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]

    f_update = theano.function([lr], [], updates=ru2up + param_up,
                               on_unused_input='ignore',
                               name='adadelta_f_update')

    return f_grad_shared, f_update


def rmsprop(lr, tparams, grads, x, mask, y, cost):
    zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                  name='%s_grad' % k)
                    for k, p in tparams.iteritems()]
    running_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
                                   name='%s_rgrad' % k)
                     for k, p in tparams.iteritems()]
    running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
                                    name='%s_rgrad2' % k)
                      for k, p in tparams.iteritems()]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
             for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function([x, mask, y], cost,
                                    updates=zgup + rgup + rg2up,
                                    name='rmsprop_f_grad_shared')

    updir = [theano.shared(p.get_value() * numpy_floatX(0.),
                           name='%s_updir' % k)
             for k, p in tparams.iteritems()]
    updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
                 for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
                                            running_grads2)]
    param_up = [(p, p + udn[1])
                for p, udn in zip(tparams.values(), updir_new)]
    f_update = theano.function([lr], [], updates=updir_new + param_up,
                               on_unused_input='ignore',
                               name='rmsprop_f_update')

    return f_grad_shared, f_update


def build_model(tparams, options):
    trng = RandomStreams(1234)

    # Used for dropout.
    use_noise = theano.shared(numpy_floatX(0.))

    x = tensor.matrix('x', dtype='int64')
    mask = tensor.matrix('mask', dtype=config.floatX)
    y = tensor.vector('y', dtype='int64')

    n_timesteps = x.shape[0]
    n_samples = x.shape[1]

    emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps,
                                                n_samples,
                                                options['dim_proj']])
    proj = get_layer(options['encoder'])[1](tparams, emb, options,
                                            prefix=options['encoder'],
                                            mask=mask)
    if options['encoder'] == 'lstm':
        proj = (proj * mask[:, :, None]).sum(axis=0)
        proj = proj / mask.sum(axis=0)[:, None]
    if options['use_dropout']:
        proj = dropout_layer(proj, use_noise, trng)

    pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U']) + tparams['b'])

    f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob')
    f_pred = theano.function([x, mask], pred.argmax(axis=1), name='f_pred')

    cost = -tensor.log(pred[tensor.arange(n_samples), y] + 1e-8).mean()

    return use_noise, x, mask, y, f_pred_prob, f_pred, cost


def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
    """ If you want to use a trained model, this is useful to compute
    the probabilities of new examples.
    """
    n_samples = len(data[0])
    probs = numpy.zeros((n_samples, 2)).astype(config.floatX)

    n_done = 0

    for _, valid_index in iterator:
        x, mask, y = prepare_data([data[0][t] for t in valid_index],
                                  numpy.array(data[1])[valid_index],
                                  maxlen=None)
        pred_probs = f_pred_prob(x, mask)
        probs[valid_index, :] = pred_probs

        n_done += len(valid_index)
        if verbose:
            print '%d/%d samples classified' % (n_done, n_samples)

    return probs


def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
    """
    Just compute the error
    f_pred: Theano fct computing the prediction
    prepare_data: usual prepare_data for that dataset.
    """
    valid_err = 0
    for _, valid_index in iterator:
        x, mask, y = prepare_data([data[0][t] for t in valid_index],
                                  numpy.array(data[1])[valid_index],
                                  maxlen=None)
        preds = f_pred(x, mask)
        targets = numpy.array(data[1])[valid_index]
        valid_err += (preds == targets).sum()
    valid_err = 1. - numpy_floatX(valid_err) / len(data[0])

    return valid_err


def train_lstm(
    dim_proj=128,  # word embeding dimension and LSTM number of hidden units.
    patience=10,  # Number of epoch to wait before early stop if no progress
    max_epochs=5000,  # The maximum number of epoch to run
    dispFreq=10,  # Display to stdout the training progress every N updates
    decay_c=0.,  # Weight decay for the classifier applied to the U weights.
    lrate=0.0001,  # Learning rate for sgd (not used for adadelta and rmsprop)
    n_words=10000,  # Vocabulary size
    optimizer=adadelta,  # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
    encoder='lstm',  # TODO: can be removed must be lstm.
    saveto='lstm_model.npz',  # The best model will be saved there
    validFreq=370,  # Compute the validation error after this number of update.
    saveFreq=1110,  # Save the parameters after every saveFreq updates
    maxlen=100,  # Sequence longer then this get ignored
    batch_size=16,  # The batch size during training.
    valid_batch_size=64,  # The batch size used for validation/test set.
    dataset='imdb',

    # Parameter for extra option
    noise_std=0.,
    use_dropout=True,  # if False slightly faster, but worst test error
                       # This frequently need a bigger model.
    reload_model="",  # Path to a saved model we want to start from.
    test_size=-1,  # If >0, we keep only this number of test example.
):

    # Model options
    model_options = locals().copy()
    print "model options", model_options

    load_data, prepare_data = get_dataset(dataset)

    print 'Loading data'
    train, valid, test = load_data(n_words=n_words, valid_portion=0.05,
                                   maxlen=maxlen)
    if test_size > 0:
        # The test set is sorted by size, but we want to keep random
        # size example.  So we must select a random selection of the
        # examples.
        idx = numpy.arange(len(test[0]))
        random.shuffle(idx)
        idx = idx[:test_size]
        test = ([test[0][n] for n in idx], [test[1][n] for n in idx])

    ydim = numpy.max(train[1]) + 1

    model_options['ydim'] = ydim

    print 'Building model'
    # This create the initial parameters as numpy ndarrays.
    # Dict name (string) -> numpy ndarray
    params = init_params(model_options)

    if reload_model:
        load_params('lstm_model.npz', params)

    # This create Theano Shared Variable from the parameters.
    # Dict name (string) -> Theano Tensor Shared Variable
    # params and tparams have different copy of the weights.
    tparams = init_tparams(params)

    # use_noise is for dropout
    (use_noise, x, mask,
     y, f_pred_prob, f_pred, cost) = build_model(tparams, model_options)

    if decay_c > 0.:
        decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c')
        weight_decay = 0.
        weight_decay += (tparams['U'] ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay

    f_cost = theano.function([x, mask, y], cost, name='f_cost')

    grads = tensor.grad(cost, wrt=tparams.values())
    f_grad = theano.function([x, mask, y], grads, name='f_grad')

    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = optimizer(lr, tparams, grads,
                                        x, mask, y, cost)

    print 'Optimization'

    kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
    kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)

    print "%d train examples" % len(train[0])
    print "%d valid examples" % len(valid[0])
    print "%d test examples" % len(test[0])
    history_errs = []
    best_p = None
    bad_count = 0

    if validFreq == -1:
        validFreq = len(train[0]) / batch_size
    if saveFreq == -1:
        saveFreq = len(train[0]) / batch_size

    uidx = 0  # the number of update done
    estop = False  # early stop
    start_time = time.clock()
    try:
        for eidx in xrange(max_epochs):
            n_samples = 0

            # Get new shuffled index for the training set.
            kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)

            for _, train_index in kf:
                uidx += 1
                use_noise.set_value(1.)

                # Select the random examples for this minibatch
                y = [train[1][t] for t in train_index]
                x = [train[0][t]for t in train_index]

                # Get the data in numpy.ndarray format
                # This swap the axis!
                # Return something of shape (minibatch maxlen, n samples)
                x, mask, y = prepare_data(x, y)
                n_samples += x.shape[1]

                cost = f_grad_shared(x, mask, y)
                f_update(lrate)

                if numpy.isnan(cost) or numpy.isinf(cost):
                    print 'NaN detected'
                    return 1., 1., 1.

                if numpy.mod(uidx, dispFreq) == 0:
                    print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost

                if saveto and numpy.mod(uidx, saveFreq) == 0:
                    print 'Saving...',

                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                    numpy.savez(saveto, history_errs=history_errs, **params)
                    pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
                    print 'Done'

                if numpy.mod(uidx, validFreq) == 0:
                    use_noise.set_value(0.)
                    train_err = pred_error(f_pred, prepare_data, train, kf)
                    valid_err = pred_error(f_pred, prepare_data, valid,
                                           kf_valid)
                    test_err = pred_error(f_pred, prepare_data, test, kf_test)

                    history_errs.append([valid_err, test_err])

                    if (uidx == 0 or
                        valid_err <= numpy.array(history_errs)[:,
                                                               0].min()):

                        best_p = unzip(tparams)
                        bad_counter = 0

                    print ('Train ', train_err, 'Valid ', valid_err,
                           'Test ', test_err)

                    if (len(history_errs) > patience and
                        valid_err >= numpy.array(history_errs)[:-patience,
                                                               0].min()):
                        bad_counter += 1
                        if bad_counter > patience:
                            print 'Early Stop!'
                            estop = True
                            break

            print 'Seen %d samples' % n_samples

            if estop:
                break

    except KeyboardInterrupt:
        print "Training interupted"

    end_time = time.clock()
    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)

    use_noise.set_value(0.)
    kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size)
    train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted)
    valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
    test_err = pred_error(f_pred, prepare_data, test, kf_test)

    print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err
    if saveto:
        numpy.savez(saveto, train_err=train_err,
                    valid_err=valid_err, test_err=test_err,
                    history_errs=history_errs, **best_p)
    print 'The code run for %d epochs, with %f sec/epochs' % (
        (eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))
    print >> sys.stderr, ('Training took %.1fs' %
                          (end_time - start_time))
    return train_err, valid_err, test_err


if __name__ == '__main__':
    # See function train for all possible parameter and there definition.
    train_lstm(
        #reload_model="lstm_model.npz",
        max_epochs=100,
        test_size=500,
    )


================================================
FILE: DeepLearningTutorials/code/mlp.py
================================================
"""
This tutorial introduces the multilayer perceptron using Theano.

 A multilayer perceptron is a logistic regressor where
instead of feeding the input to the logistic regression you insert a
intermediate layer, called the hidden layer, that has a nonlinear
activation function (usually tanh or sigmoid) . One can use many such
hidden layers making the architecture deep. The tutorial will also tackle
the problem of MNIST digit classification.

.. math::

    f(x) = G( b^{(2)} + W^{(2)}( s( b^{(1)} + W^{(1)} x))),

References:

    - textbooks: "Pattern Recognition and Machine Learning" -
                 Christopher M. Bishop, section 5

"""
__docformat__ = 'restructedtext en'


import os
import sys
import time

import numpy

import theano
import theano.tensor as T


from logistic_sgd import LogisticRegression, load_data


# start-snippet-1
class HiddenLayer(object):
    def __init__(self, rng, input, n_in, n_out, W=None, b=None,
                 activation=T.tanh):
        """
        Typical hidden layer of a MLP: units are fully-connected and have
        sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
        and the bias vector b is of shape (n_out,).

        NOTE : The nonlinearity used here is tanh

        Hidden unit activation is given by: tanh(dot(input,W) + b)

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dmatrix
        :param input: a symbolic tensor of shape (n_examples, n_in)

        :type n_in: int
        :param n_in: dimensionality of input

        :type n_out: int
        :param n_out: number of hidden units

        :type activation: theano.Op or function
        :param activation: Non linearity to be applied in the hidden
                           layer
        """
        self.input = input
        # end-snippet-1

        # `W` is initialized with `W_values` which is uniformely sampled
        # from sqrt(-6./(n_in+n_hidden)) and sqrt(6./(n_in+n_hidden))
        # for tanh activation function
        # the output of uniform if converted using asarray to dtype
        # theano.config.floatX so that the code is runable on GPU
        # Note : optimal initialization of weights is dependent on the
        #        activation function used (among other things).
        #        For example, results presented in [Xavier10] suggest that you
        #        should use 4 times larger initial weights for sigmoid
        #        compared to tanh
        #        We have no info for other function, so we use the same as
        #        tanh.
        if W is None:
            W_values = numpy.asarray(
                rng.uniform(
                    low=-numpy.sqrt(6. / (n_in + n_out)),
                    high=numpy.sqrt(6. / (n_in + n_out)),
                    size=(n_in, n_out)
                ),
                dtype=theano.config.floatX
            )
            if activation == theano.tensor.nnet.sigmoid:
                W_values *= 4

            W = theano.shared(value=W_values, name='W', borrow=True)

        if b is None:
            b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
            b = theano.shared(value=b_values, name='b', borrow=True)

        self.W = W
        self.b = b

        lin_output = T.dot(input, self.W) + self.b
        self.output = (
            lin_output if activation is None
            else activation(lin_output)
        )
        # parameters of the model
        self.params = [self.W, self.b]


# start-snippet-2
class MLP(object):
    """Multi-Layer Perceptron Class

    A multilayer perceptron is a feedforward artificial neural network model
    that has one layer or more of hidden units and nonlinear activations.
    Intermediate layers usually have as activation function tanh or the
    sigmoid function (defined here by a ``HiddenLayer`` class)  while the
    top layer is a softamx layer (defined here by a ``LogisticRegression``
    class).
    """

    def __init__(self, rng, input, n_in, n_hidden, n_out):
        """Initialize the parameters for the multilayer perceptron

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
        architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
        which the datapoints lie

        :type n_hidden: int
        :param n_hidden: number of hidden units

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
        which the labels lie

        """

        # Since we are dealing with a one hidden layer MLP, this will translate
        # into a HiddenLayer with a tanh activation function connected to the
        # LogisticRegression layer; the activation function can be replaced by
        # sigmoid or any other nonlinear function
        self.hiddenLayer = HiddenLayer(
            rng=rng,
            input=input,
            n_in=n_in,
            n_out=n_hidden,
            activation=T.tanh
        )

        # The logistic regression layer gets as input the hidden units
        # of the hidden layer
        self.logRegressionLayer = LogisticRegression(
            input=self.hiddenLayer.output,
            n_in=n_hidden,
            n_out=n_out
        )
        # end-snippet-2 start-snippet-3
        # L1 norm ; one regularization option is to enforce L1 norm to
        # be small
        self.L1 = (
            abs(self.hiddenLayer.W).sum()
            + abs(self.logRegressionLayer.W).sum()
        )

        # square of L2 norm ; one regularization option is to enforce
        # square of L2 norm to be small
        self.L2_sqr = (
            (self.hiddenLayer.W ** 2).sum()
            + (self.logRegressionLayer.W ** 2).sum()
        )

        # negative log likelihood of the MLP is given by the negative
        # log likelihood of the output of the model, computed in the
        # logistic regression layer
        self.negative_log_likelihood = (
            self.logRegressionLayer.negative_log_likelihood
        )
        # same holds for the function computing the number of errors
        self.errors = self.logRegressionLayer.errors

        # the parameters of the model are the parameters of the two layer it is
        # made out of
        self.params = self.hiddenLayer.params + self.logRegressionLayer.params
        # end-snippet-3


def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
             dataset='mnist.pkl.gz', batch_size=20, n_hidden=500):
    """
    Demonstrate stochastic gradient descent optimization for a multilayer
    perceptron

    This is demonstrated on MNIST.

    :type learning_rate: float
    :param learning_rate: learning rate used (factor for the stochastic
    gradient

    :type L1_reg: float
    :param L1_reg: L1-norm's weight when added to the cost (see
    regularization)

    :type L2_reg: float
    :param L2_reg: L2-norm's weight when added to the cost (see
    regularization)

    :type n_epochs: int
    :param n_epochs: maximal number of epochs to run the optimizer

    :type dataset: string
    :param dataset: the path of the MNIST dataset file from
                 http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz


   """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    ######################
    # BUILD ACTUAL MODEL #
    ######################
    print '... building the model'

    # allocate symbolic variables for the data
    index = T.lscalar()  # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images
    y = T.ivector('y')  # the labels are presented as 1D vector of
                        # [int] labels

    rng = numpy.random.RandomState(1234)

    # construct the MLP class
    classifier = MLP(
        rng=rng,
        input=x,
        n_in=28 * 28,
        n_hidden=n_hidden,
        n_out=10
    )

    # start-snippet-4
    # the cost we minimize during training is the negative log likelihood of
    # the model plus the regularization terms (L1 and L2); cost is expressed
    # here symbolically
    cost = (
        classifier.negative_log_likelihood(y)
        + L1_reg * classifier.L1
        + L2_reg * classifier.L2_sqr
    )
    # end-snippet-4

    # compiling a Theano function that computes the mistakes that are made
    # by the model on a minibatch
    test_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: test_set_x[index * batch_size:(index + 1) * batch_size],
            y: test_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=classifier.errors(y),
        givens={
            x: valid_set_x[index * batch_size:(index + 1) * batch_size],
            y: valid_set_y[index * batch_size:(index + 1) * batch_size]
        }
    )

    # start-snippet-5
    # compute the gradient of cost with respect to theta (sotred in params)
    # the resulting gradients will be stored in a list gparams
    gparams = [T.grad(cost, param) for param in classifier.params]

    # specify how to update the parameters of the model as a list of
    # (variable, update expression) pairs

    # given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of
    # same length, zip generates a list C of same size, where each element
    # is a pair formed from the two lists :
    #    C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    # compiling a Theano function `train_model` that returns the cost, but
    # in the same time updates the parameter of the model based on the rules
    # defined in `updates`
    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )
    # end-snippet-5

    ###############
    # TRAIN MODEL #
    ###############
    print '... training'

    # early-stopping parameters
    patience = 10000  # look as this many examples regardless
    patience_increase = 2  # wait this much longer when a new best is
                           # found
    improvement_threshold = 0.995  # a relative improvement of this much is
                                   # considered significant
    validation_frequency = min(n_train_batches, patience / 2)
                                  # go through this many
                                  # minibatche before checking the network
                                  # on the validation set; in this case we
                                  # check every epoch

    best_validation_loss = numpy.inf
    best_iter = 0
    test_score = 0.
    start_time = time.clock()

    epoch = 0
    done_looping = False

    while (epoch < n_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):

            minibatch_avg_cost = train_model(minibatch_index)
            # iteration number
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                # compute zero-one loss on validation set
                validation_losses = [validate_model(i) for i
                                     in xrange(n_valid_batches)]
                this_validation_loss = numpy.mean(validation_losses)

                print(
                    'epoch %i, minibatch %i/%i, validation error %f %%' %
                    (
                        epoch,
                        minibatch_index + 1,
                        n_train_batches,
                        this_validation_loss * 100.
                    )
                )

                # if we got the best validation score until now
                if this_validation_loss < best_validation_loss:
                    #improve patience if loss improvement is good enough
                    if (
                        this_validation_loss < best_validation_loss *
                        improvement_threshold
                    ):
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    # test it on the test set
                    test_losses = [test_model(i) for i
                                   in xrange(n_test_batches)]
                    test_score = numpy.mean(test_losses)

                    print(('     epoch %i, minibatch %i/%i, test error of '
                           'best model %f %%') %
                          (epoch, minibatch_index + 1, n_train_batches,
                           test_score * 100.))

            if patience <= iter:
                done_looping = True
                break

    end_time = time.clock()
    print(('Optimization complete. Best validation score of %f %% '
           'obtained at iteration %i, with test performance %f %%') %
          (best_validation_loss * 100., best_iter + 1, test_score * 100.))
    print >> sys.stderr, ('The code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))


if __name__ == '__main__':
    test_mlp()


================================================
FILE: DeepLearningTutorials/code/rbm.py
================================================
"""This tutorial introduces restricted boltzmann machines (RBM) using Theano.

Boltzmann Machines (BMs) are a particular form of energy-based model which
contain hidden variables. Restricted Boltzmann Machines further restrict BMs
to those without visible-visible and hidden-hidden connections.
"""
import time

try:
    import PIL.Image as Image
except ImportError:
    import Image

import numpy

import theano
import theano.tensor as T
import os

from theano.tensor.shared_randomstreams import RandomStreams

from utils import tile_raster_images
from logistic_sgd import load_data


# start-snippet-1
class RBM(object):
    """Restricted Boltzmann Machine (RBM)  """
    def __init__(
        self,
        input=None,
        n_visible=784,
        n_hidden=500,
        W=None,
        hbias=None,
        vbias=None,
        numpy_rng=None,
        theano_rng=None
    ):
        """
        RBM constructor. Defines the parameters of the model along with
        basic operations for inferring hidden from visible (and vice-versa),
        as well as for performing CD updates.

        :param input: None for standalone RBMs or symbolic variable if RBM is
        part of a larger graph.

        :param n_visible: number of visible units

        :param n_hidden: number of hidden units

        :param W: None for standalone RBMs or symbolic variable pointing to a
        shared weight matrix in case RBM is part of a DBN network; in a DBN,
        the weights are shared between RBMs and layers of a MLP

        :param hbias: None for standalone RBMs or symbolic variable pointing
        to a shared hidden units bias vector in case RBM is part of a
        different network

        :param vbias: None for standalone RBMs or a symbolic variable
        pointing to a shared visible units bias
        """

        self.n_visible = n_visible
        self.n_hidden = n_hidden

        if numpy_rng is None:
            # create a number generator
            numpy_rng = numpy.random.RandomState(1234)

        if theano_rng is None:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        if W is None:
            # W is initialized with `initial_W` which is uniformely
            # sampled from -4*sqrt(6./(n_visible+n_hidden)) and
            # 4*sqrt(6./(n_hidden+n_visible)) the output of uniform if
            # converted using asarray to dtype theano.config.floatX so
            # that the code is runable on GPU
            initial_W = numpy.asarray(
                numpy_rng.uniform(
                    low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                    high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
                    size=(n_visible, n_hidden)
                ),
                dtype=theano.config.floatX
            )
            # theano shared variables for weights and biases
            W = theano.shared(value=initial_W, name='W', borrow=True)

        if hbias i
Download .txt
gitextract_7ps85ir2/

├── .gitignore
├── DBN.py
├── DeepLearningTutorials/
│   ├── .gitignore
│   ├── .hgignore
│   ├── .travis.yml
│   ├── README.rst
│   ├── __init__.py
│   ├── code/
│   │   ├── DBN.py
│   │   ├── SdA.py
│   │   ├── __init__.py
│   │   ├── cA.py
│   │   ├── convolutional_mlp.py
│   │   ├── dA.py
│   │   ├── hmc/
│   │   │   ├── __init__.py
│   │   │   ├── hmc.py
│   │   │   └── test_hmc.py
│   │   ├── imdb.py
│   │   ├── imdb_preprocess.py
│   │   ├── logistic_cg.py
│   │   ├── logistic_sgd.py
│   │   ├── lstm.py
│   │   ├── mlp.py
│   │   ├── rbm.py
│   │   ├── rnnrbm.py
│   │   ├── rnnslu.py
│   │   ├── test.py
│   │   └── utils.py
│   ├── data/
│   │   ├── download.sh
│   │   └── training_colorpatches_16x16_demo.mat
│   ├── doc/
│   │   ├── .templates/
│   │   │   └── layout.html
│   │   ├── DBN.txt
│   │   ├── LICENSE.txt
│   │   ├── Makefile
│   │   ├── SdA.txt
│   │   ├── conf.py
│   │   ├── contents.txt
│   │   ├── dA.txt
│   │   ├── deep.txt
│   │   ├── gettingstarted.txt
│   │   ├── hmc.txt
│   │   ├── index.txt
│   │   ├── lenet.txt
│   │   ├── logreg.txt
│   │   ├── lstm.txt
│   │   ├── mlp.txt
│   │   ├── rbm.txt
│   │   ├── references.txt
│   │   ├── rnnrbm.txt
│   │   ├── rnnslu.txt
│   │   ├── scripts/
│   │   │   └── docgen.py
│   │   └── utilities.txt
│   ├── issues_closed/
│   │   └── 2_RBM_cost_fn.txt
│   ├── issues_open/
│   │   ├── 1_SdA_performance.txt
│   │   ├── 3_RBM_scan_GPU.txt
│   │   ├── 4_RBM_scan.txt
│   │   ├── 5_results.txt
│   │   └── 6_benchmarking_pybrain.txt
│   └── misc/
│       └── do_nightly_build
├── README.md
├── joplin/
│   ├── alabama.xml
│   ├── cleopha.xml
│   ├── entertainer.xml
│   ├── maple_leaf.xml
│   ├── searchlight.xml
│   ├── strenous.xml
│   ├── syncopations.xml
│   ├── winners.xml
│   └── winners_2.xml
├── joplin-model.pickle
├── joplin_data.pickle
├── midi/
│   ├── DataTypeConverters.py
│   ├── EventDispatcher.py
│   ├── Icon_
│   ├── MidiFileParser.py
│   ├── MidiInFile.py
│   ├── MidiInStream.py
│   ├── MidiOutFile.py
│   ├── MidiOutStream.py
│   ├── MidiToText.py
│   ├── RawInstreamFile.py
│   ├── RawOutstreamFile.py
│   ├── __init__.py
│   ├── changes.txt
│   ├── constants.py
│   ├── example_mimimal_type0.py
│   ├── example_print_channel_0.py
│   ├── example_print_events.py
│   ├── example_print_file.py
│   ├── example_transpose_octave.py
│   ├── files.txt
│   ├── hallelujah.mid
│   ├── license.txt
│   ├── readme
│   ├── readme.txt
│   ├── utils.py
│   └── version.txt
├── myparser.py
└── neural-plugin/
    ├── DoubleTime.js
    ├── neural-plugin.js
    ├── neural-plugin.ui
    └── output-window.ui
Download .txt
SYMBOL INDEX (360 symbols across 37 files)

FILE: DBN.py
  class AutoencodingDBN (line 33) | class AutoencodingDBN(object):
    method __init__ (line 39) | def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
    method dump_params (line 182) | def dump_params(self, outLoc):
    method pretraining_functions (line 196) | def pretraining_functions(self, train_set_x, batch_size, k):
    method build_finetune_functions (line 246) | def build_finetune_functions(self, train_set_x, batch_size, learning_r...
    method build_generative_finetune_fns (line 304) | def build_generative_finetune_fns(self, train_set_outputs, train_set_l...
    method generate (line 356) | def generate(self, top_level):
    method label (line 370) | def label(self, to_label, x_mask, learning_rate):
    method train_dbn (line 391) | def train_dbn(self, data_file, finetune_lr=0.01, pretraining_epochs=100,
    method sample (line 534) | def sample(self, top_level=None, rootLoc='./', save=True, threshold=0.5,
    method label_from_file (line 559) | def label_from_file(self, rootLoc, fileLoc, learn_rate, n_iters, thres...
  function melody_blocker (line 604) | def melody_blocker(snippet):
  function load_from_dump (line 623) | def load_from_dump(inLoc):

FILE: DeepLearningTutorials/code/DBN.py
  class DBN (line 19) | class DBN(object):
    method __init__ (line 30) | def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
    method pretraining_functions (line 139) | def pretraining_functions(self, train_set_x, batch_size, k):
    method build_finetune_functions (line 189) | def build_finetune_functions(self, datasets, batch_size, learning_rate):
  function test_DBN (line 279) | def test_DBN(finetune_lr=0.1, pretraining_epochs=100,

FILE: DeepLearningTutorials/code/SdA.py
  class SdA (line 48) | class SdA(object):
    method __init__ (line 59) | def __init__(
    method pretraining_functions (line 180) | def pretraining_functions(self, train_set_x, batch_size):
    method build_finetune_functions (line 231) | def build_finetune_functions(self, datasets, batch_size, learning_rate):
  function test_SdA (line 326) | def test_SdA(finetune_lr=0.1, pretraining_epochs=15,

FILE: DeepLearningTutorials/code/cA.py
  class cA (line 50) | class cA(object):
    method __init__ (line 79) | def __init__(self, numpy_rng, input=None, n_visible=784, n_hidden=100,
    method get_hidden_values (line 172) | def get_hidden_values(self, input):
    method get_jacobian (line 176) | def get_jacobian(self, hidden, W):
    method get_reconstructed_input (line 186) | def get_reconstructed_input(self, hidden):
    method get_cost_updates (line 193) | def get_cost_updates(self, contraction_level, learning_rate):
  function test_cA (line 228) | def test_cA(learning_rate=0.01, training_epochs=20,

FILE: DeepLearningTutorials/code/convolutional_mlp.py
  class LeNetConvPoolLayer (line 39) | class LeNetConvPoolLayer(object):
    method __init__ (line 42) | def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,...
  function evaluate_lenet5 (line 114) | def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
  function experiment (line 342) | def experiment(state, channel):

FILE: DeepLearningTutorials/code/dA.py
  class dA (line 53) | class dA(object):
    method __init__ (line 77) | def __init__(
    method get_corrupted_input (line 196) | def get_corrupted_input(self, input, corruption_level):
    method get_hidden_values (line 222) | def get_hidden_values(self, input):
    method get_reconstructed_input (line 226) | def get_reconstructed_input(self, hidden):
    method get_cost_updates (line 233) | def get_cost_updates(self, corruption_level, learning_rate):
  function test_dA (line 263) | def test_dA(learning_rate=0.1, training_epochs=15,

FILE: DeepLearningTutorials/code/hmc/hmc.py
  function kinetic_energy (line 15) | def kinetic_energy(vel):
  function hamiltonian (line 33) | def hamiltonian(pos, vel, energy_fn):
  function metropolis_hastings_accept (line 58) | def metropolis_hastings_accept(energy_prev, energy_next, s_rng):
  function simulate_dynamics (line 83) | def simulate_dynamics(initial_pos, initial_vel, stepsize, n_steps, energ...
  function hmc_move (line 177) | def hmc_move(s_rng, positions, energy_fn, stepsize, n_steps):
  function hmc_updates (line 231) | def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, acc...
  class HMC_sampler (line 308) | class HMC_sampler(object):
    method __init__ (line 324) | def __init__(self, **kwargs):
    method new_from_shared_positions (line 328) | def new_from_shared_positions(
    method draw (line 400) | def draw(self, **kwargs):

FILE: DeepLearningTutorials/code/hmc/test_hmc.py
  function sampler_on_nd_gaussian (line 8) | def sampler_on_nd_gaussian(sampler_cls, burnin, n_samples, dim=10):
  function test_hmc (line 56) | def test_hmc():

FILE: DeepLearningTutorials/code/imdb.py
  function prepare_data (line 9) | def prepare_data(seqs, labels, maxlen=None):
  function get_dataset_file (line 51) | def get_dataset_file(dataset, default_dataset, origin):
  function load_data (line 77) | def load_data(path="imdb.pkl", n_words=100000, valid_portion=0.1, maxlen...

FILE: DeepLearningTutorials/code/imdb_preprocess.py
  function tokenize (line 28) | def tokenize(sentences):
  function build_dict (line 40) | def build_dict(path):
  function grab_data (line 80) | def grab_data(path, dictionary):
  function main (line 98) | def main():

FILE: DeepLearningTutorials/code/logistic_cg.py
  class LogisticRegression (line 51) | class LogisticRegression(object):
    method __init__ (line 60) | def __init__(self, input, n_in, n_out):
    method negative_log_likelihood (line 100) | def negative_log_likelihood(self, y):
    method errors (line 117) | def errors(self, y):
  function cg_optimization_mnist (line 141) | def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):

FILE: DeepLearningTutorials/code/logistic_sgd.py
  class LogisticRegression (line 49) | class LogisticRegression(object):
    method __init__ (line 58) | def __init__(self, input, n_in, n_out):
    method negative_log_likelihood (line 112) | def negative_log_likelihood(self, y):
    method errors (line 144) | def errors(self, y):
  function load_data (line 169) | def load_data(dataset):
  function sgd_optimization_mnist (line 248) | def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,

FILE: DeepLearningTutorials/code/lstm.py
  function numpy_floatX (line 21) | def numpy_floatX(data):
  function get_minibatches_idx (line 25) | def get_minibatches_idx(n, minibatch_size, shuffle=False):
  function get_dataset (line 49) | def get_dataset(name):
  function zipp (line 53) | def zipp(params, tparams):
  function unzip (line 61) | def unzip(zipped):
  function dropout_layer (line 71) | def dropout_layer(state_before, use_noise, trng):
  function _p (line 81) | def _p(pp, name):
  function init_params (line 85) | def init_params(options):
  function load_params (line 105) | def load_params(path, params):
  function init_tparams (line 115) | def init_tparams(params):
  function get_layer (line 122) | def get_layer(name):
  function ortho_weight (line 127) | def ortho_weight(ndim):
  function param_init_lstm (line 133) | def param_init_lstm(options, params, prefix='lstm'):
  function lstm_layer (line 155) | def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
  function sgd (line 209) | def sgd(lr, tparams, grads, x, mask, y, cost):
  function adadelta (line 237) | def adadelta(lr, tparams, grads, x, mask, y, cost):
  function rmsprop (line 270) | def rmsprop(lr, tparams, grads, x, mask, y, cost):
  function build_model (line 305) | def build_model(tparams, options):
  function pred_probs (line 340) | def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
  function pred_error (line 363) | def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
  function train_lstm (line 382) | def train_lstm(

FILE: DeepLearningTutorials/code/mlp.py
  class HiddenLayer (line 38) | class HiddenLayer(object):
    method __init__ (line 39) | def __init__(self, rng, input, n_in, n_out, W=None, b=None,
  class MLP (line 112) | class MLP(object):
    method __init__ (line 123) | def __init__(self, rng, input, n_in, n_hidden, n_out):
  function test_mlp (line 195) | def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,

FILE: DeepLearningTutorials/code/rbm.py
  class RBM (line 27) | class RBM(object):
    method __init__ (line 29) | def __init__(
    method free_energy (line 127) | def free_energy(self, v_sample):
    method propup (line 134) | def propup(self, vis):
    method sample_h_given_v (line 148) | def sample_h_given_v(self, v0_sample):
    method propdown (line 162) | def propdown(self, hid):
    method sample_v_given_h (line 176) | def sample_v_given_h(self, h0_sample):
    method gibbs_hvh (line 189) | def gibbs_hvh(self, h0_sample):
    method gibbs_vhv (line 197) | def gibbs_vhv(self, v0_sample):
    method get_cost_updates (line 206) | def get_cost_updates(self, lr=0.1, persistent=None, k=1):
    method get_pseudo_likelihood_cost (line 289) | def get_pseudo_likelihood_cost(self, updates):
    method get_reconstruction_cost (line 318) | def get_reconstruction_cost(self, updates, pre_sigmoid_nv):
  function test_rbm (line 359) | def test_rbm(learning_rate=0.1, training_epochs=15,

FILE: DeepLearningTutorials/code/rnnrbm.py
  function build_rbm (line 30) | def build_rbm(v, W, bv, bh, k):
  function shared_normal (line 81) | def shared_normal(num_rows, num_cols, scale=1):
  function shared_zeros (line 88) | def shared_zeros(*shape):
  function build_rnnrbm (line 93) | def build_rnnrbm(n_visible, n_hidden, n_hidden_recurrent):
  class RnnRbm (line 177) | class RnnRbm:
    method __init__ (line 181) | def __init__(
    method train (line 231) | def train(self, files, batch_size=100, num_epochs=200):
    method generate (line 267) | def generate(self, filename, show=True):
  function test_rnnrbm (line 289) | def test_rnnrbm(batch_size=100, num_epochs=200):

FILE: DeepLearningTutorials/code/rnnslu.py
  function shuffle (line 29) | def shuffle(lol, seed):
  function contextwin (line 42) | def contextwin(l, win):
  function atisfold (line 65) | def atisfold(fold):
  function conlleval (line 74) | def conlleval(p, g, w, filename, script_path):
  function download (line 105) | def download(origin, destination):
  function get_perf (line 114) | def get_perf(filename, folder):
  class RNNSLU (line 142) | class RNNSLU(object):
    method __init__ (line 144) | def __init__(self, nh, nc, ne, de, cs):
    method train (line 234) | def train(self, x, y, window_size, learning_rate):
    method save (line 243) | def save(self, folder):
    method load (line 248) | def load(self, folder):
  function main (line 254) | def main(param=None):

FILE: DeepLearningTutorials/code/test.py
  function test_rnnslu (line 18) | def test_rnnslu():
  function test_logistic_sgd (line 22) | def test_logistic_sgd():
  function test_logistic_cg (line 26) | def test_logistic_cg():
  function test_mlp (line 36) | def test_mlp():
  function test_convolutional_mlp (line 40) | def test_convolutional_mlp():
  function test_dA (line 44) | def test_dA():
  function test_SdA (line 48) | def test_SdA():
  function test_dbn (line 52) | def test_dbn():
  function test_rbm (line 56) | def test_rbm():
  function test_rnnrbm (line 61) | def test_rnnrbm():
  function test_lstm (line 65) | def test_lstm():
  function speed (line 69) | def speed():

FILE: DeepLearningTutorials/code/utils.py
  function scale_to_unit_interval (line 13) | def scale_to_unit_interval(ndar, eps=1e-8):
  function tile_raster_images (line 21) | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),

FILE: DeepLearningTutorials/doc/scripts/docgen.py
  function mkdir (line 26) | def mkdir(path):

FILE: midi/DataTypeConverters.py
  function getNibbles (line 18) | def getNibbles(byte):
  function setNibbles (line 35) | def setNibbles(hiNibble, loNibble):
  function readBew (line 53) | def readBew(value):
  function writeBew (line 64) | def writeBew(value, length):
  function readVar (line 85) | def readVar(value):
  function varLen (line 104) | def varLen(value):
  function writeVar (line 119) | def writeVar(value):
  function to_n_bits (line 127) | def to_n_bits(value, length=1, nbits=7):
  function toBytes (line 134) | def toBytes(value):
  function fromBytes (line 139) | def fromBytes(value):

FILE: midi/EventDispatcher.py
  class EventDispatcher (line 14) | class EventDispatcher:
    method __init__ (line 17) | def __init__(self, outstream):
    method header (line 50) | def header(self, format, nTracks, division):
    method start_of_track (line 55) | def start_of_track(self, current_track):
    method sysex_event (line 65) | def sysex_event(self, data):
    method eof (line 70) | def eof(self):
    method update_time (line 75) | def update_time(self, new_time=0, relative=1):
    method reset_time (line 80) | def reset_time(self):
    method channel_messages (line 88) | def channel_messages(self, hi_nible, channel, data):
    method continuous_controllers (line 139) | def continuous_controllers(self, channel, controller, value):
    method system_commons (line 154) | def system_commons(self, common_type, common_data):
    method meta_event (line 182) | def meta_event(self, meta_type, data):

FILE: midi/MidiFileParser.py
  class MidiFileParser (line 12) | class MidiFileParser:
    method __init__ (line 21) | def __init__(self, raw_in, outstream):
    method parseMThdChunk (line 37) | def parseMThdChunk(self):
    method parseMTrkChunk (line 66) | def parseMTrkChunk(self):
    method parseMTrkChunks (line 165) | def parseMTrkChunks(self):

FILE: midi/MidiInFile.py
  class MidiInFile (line 7) | class MidiInFile:
    method __init__ (line 38) | def __init__(self, outStream, infile):
    method read (line 44) | def read(self):
    method setData (line 51) | def setData(self, data=''):

FILE: midi/MidiInStream.py
  class MidiInStream (line 5) | class MidiInStream:
    method __init__ (line 12) | def __init__(self, midiOutStream, device):
    method close (line 27) | def close(self):
    method read (line 34) | def read(self, time=0):
    method resetTimer (line 45) | def resetTimer(self, time=0):

FILE: midi/MidiOutFile.py
  class MidiOutFile (line 9) | class MidiOutFile(MidiOutStream):
    method __init__ (line 17) | def __init__(self, raw_out=''):
    method write (line 23) | def write(self):
    method event_slice (line 27) | def event_slice(self, slc):
    method note_on (line 41) | def note_on(self, channel=0, note=0x40, velocity=0x40):
    method note_off (line 51) | def note_off(self, channel=0, note=0x40, velocity=0x40):
    method aftertouch (line 61) | def aftertouch(self, channel=0, note=0x40, velocity=0x40):
    method continuous_controller (line 71) | def continuous_controller(self, channel, controller, value):
    method patch_change (line 83) | def patch_change(self, channel, patch):
    method channel_pressure (line 93) | def channel_pressure(self, channel, pressure):
    method pitch_bend (line 103) | def pitch_bend(self, channel, value):
    method system_exclusive (line 125) | def system_exclusive(self, data):
    method midi_time_code (line 137) | def midi_time_code(self, msg_type, values):
    method song_position_pointer (line 146) | def song_position_pointer(self, value):
    method song_select (line 156) | def song_select(self, songNumber):
    method tuning_request (line 164) | def tuning_request(self):
    method header (line 175) | def header(self, format=0, nTracks=1, division=96):
    method eof (line 192) | def eof(self):
    method meta_slice (line 205) | def meta_slice(self, meta_type, data_slice):
    method meta_event (line 212) | def meta_event(self, meta_type, data):
    method start_of_track (line 219) | def start_of_track(self, n_track=0):
    method end_of_track (line 228) | def end_of_track(self):
    method sequence_number (line 244) | def sequence_number(self, value):
    method text (line 252) | def text(self, text):
    method copyright (line 260) | def copyright(self, text):
    method sequence_name (line 269) | def sequence_name(self, text):
    method instrument_name (line 277) | def instrument_name(self, text):
    method lyric (line 285) | def lyric(self, text):
    method marker (line 293) | def marker(self, text):
    method cuepoint (line 301) | def cuepoint(self, text):
    method midi_ch_prefix (line 309) | def midi_ch_prefix(self, channel):
    method midi_port (line 318) | def midi_port(self, value):
    method tempo (line 326) | def tempo(self, value):
    method smtp_offset (line 337) | def smtp_offset(self, hour, minute, second, frame, framePart):
    method time_signature (line 357) | def time_signature(self, nn, dd, cc, bb):
    method key_signature (line 373) | def key_signature(self, sf, mi):
    method sequencer_specific (line 385) | def sequencer_specific(self, data):

FILE: midi/MidiOutStream.py
  class MidiOutStream (line 3) | class MidiOutStream:
    method __init__ (line 22) | def __init__(self):
    method update_time (line 35) | def update_time(self, new_time=0, relative=1):
    method reset_time (line 47) | def reset_time(self):
    method rel_time (line 54) | def rel_time(self):
    method abs_time (line 58) | def abs_time(self):
    method reset_run_stat (line 64) | def reset_run_stat(self):
    method set_run_stat (line 68) | def set_run_stat(self, new_status):
    method get_run_stat (line 72) | def get_run_stat(self):
    method set_current_track (line 78) | def set_current_track(self, new_track):
    method get_current_track (line 82) | def get_current_track(self):
    method channel_message (line 91) | def channel_message(self, message_type, channel, data):
    method note_on (line 96) | def note_on(self, channel=0, note=0x40, velocity=0x40):
    method note_off (line 105) | def note_off(self, channel=0, note=0x40, velocity=0x40):
    method aftertouch (line 114) | def aftertouch(self, channel=0, note=0x40, velocity=0x40):
    method continuous_controller (line 123) | def continuous_controller(self, channel, controller, value):
    method patch_change (line 132) | def patch_change(self, channel, patch):
    method channel_pressure (line 141) | def channel_pressure(self, channel, pressure):
    method pitch_bend (line 150) | def pitch_bend(self, channel, value):
    method system_exclusive (line 165) | def system_exclusive(self, data):
    method song_position_pointer (line 176) | def song_position_pointer(self, value):
    method song_select (line 184) | def song_select(self, songNumber):
    method tuning_request (line 192) | def tuning_request(self):
    method midi_time_code (line 200) | def midi_time_code(self, msg_type, values):
    method header (line 211) | def header(self, format=0, nTracks=1, division=96):
    method eof (line 221) | def eof(self):
    method meta_event (line 233) | def meta_event(self, meta_type, data):
    method start_of_track (line 241) | def start_of_track(self, n_track=0):
    method end_of_track (line 249) | def end_of_track(self):
    method sequence_number (line 257) | def sequence_number(self, value):
    method text (line 265) | def text(self, text):
    method copyright (line 274) | def copyright(self, text):
    method sequence_name (line 283) | def sequence_name(self, text):
    method instrument_name (line 292) | def instrument_name(self, text):
    method lyric (line 300) | def lyric(self, text):
    method marker (line 308) | def marker(self, text):
    method cuepoint (line 316) | def cuepoint(self, text):
    method midi_ch_prefix (line 324) | def midi_ch_prefix(self, channel):
    method midi_port (line 332) | def midi_port(self, value):
    method tempo (line 340) | def tempo(self, value):
    method smtp_offset (line 350) | def smtp_offset(self, hour, minute, second, frame, framePart):
    method time_signature (line 370) | def time_signature(self, nn, dd, cc, bb):
    method key_signature (line 385) | def key_signature(self, sf, mi):
    method sequencer_specific (line 397) | def sequencer_specific(self, data):
    method timing_clock (line 410) | def timing_clock(self):
    method song_start (line 419) | def song_start(self):
    method song_stop (line 428) | def song_stop(self):
    method song_continue (line 437) | def song_continue(self):
    method active_sensing (line 446) | def active_sensing(self):
    method system_reset (line 455) | def system_reset(self):

FILE: midi/MidiToText.py
  class MidiToText (line 4) | class MidiToText(MidiOutStream):
    method channel_message (line 15) | def channel_message(self, message_type, channel, data):
    method note_on (line 20) | def note_on(self, channel=0, note=0x40, velocity=0x40):
    method note_off (line 23) | def note_off(self, channel=0, note=0x40, velocity=0x40):
    method aftertouch (line 26) | def aftertouch(self, channel=0, note=0x40, velocity=0x40):
    method continuous_controller (line 30) | def continuous_controller(self, channel, controller, value):
    method patch_change (line 34) | def patch_change(self, channel, patch):
    method channel_pressure (line 38) | def channel_pressure(self, channel, pressure):
    method pitch_bend (line 42) | def pitch_bend(self, channel, value):
    method system_exclusive (line 51) | def system_exclusive(self, data):
    method song_position_pointer (line 55) | def song_position_pointer(self, value):
    method song_select (line 59) | def song_select(self, songNumber):
    method tuning_request (line 63) | def tuning_request(self):
    method midi_time_code (line 67) | def midi_time_code(self, msg_type, values):
    method header (line 75) | def header(self, format=0, nTracks=1, division=96):
    method eof (line 80) | def eof(self):
    method start_of_track (line 84) | def start_of_track(self, n_track=0):
    method end_of_track (line 88) | def end_of_track(self):
    method sysex_event (line 97) | def sysex_event(self, data):
    method meta_event (line 104) | def meta_event(self, meta_type, data):
    method sequence_number (line 108) | def sequence_number(self, value):
    method text (line 112) | def text(self, text):
    method copyright (line 116) | def copyright(self, text):
    method sequence_name (line 120) | def sequence_name(self, text):
    method instrument_name (line 124) | def instrument_name(self, text):
    method lyric (line 128) | def lyric(self, text):
    method marker (line 132) | def marker(self, text):
    method cuepoint (line 136) | def cuepoint(self, text):
    method midi_ch_prefix (line 140) | def midi_ch_prefix(self, channel):
    method midi_port (line 144) | def midi_port(self, value):
    method tempo (line 148) | def tempo(self, value):
    method smtp_offset (line 152) | def smtp_offset(self, hour, minute, second, frame, framePart):
    method time_signature (line 156) | def time_signature(self, nn, dd, cc, bb):
    method key_signature (line 160) | def key_signature(self, sf, mi):
    method sequencer_specific (line 164) | def sequencer_specific(self, data):

FILE: midi/RawInstreamFile.py
  class RawInstreamFile (line 11) | class RawInstreamFile:
    method __init__ (line 21) | def __init__(self, infile=''):
    method setData (line 46) | def setData(self, data=''):
    method setCursor (line 52) | def setCursor(self, position=0):
    method getCursor (line 57) | def getCursor(self):
    method moveCursor (line 62) | def moveCursor(self, relative_position=0):
    method nextSlice (line 68) | def nextSlice(self, length, move_cursor=1):
    method readBew (line 77) | def readBew(self, n_bytes=1, move_cursor=1):
    method readVarLen (line 85) | def readVarLen(self):

FILE: midi/RawOutstreamFile.py
  class RawOutstreamFile (line 12) | class RawOutstreamFile:
    method __init__ (line 20) | def __init__(self, outfile=''):
    method writeSlice (line 28) | def writeSlice(self, str_slice):
    method writeBew (line 33) | def writeBew(self, value, length=1):
    method writeVarLen (line 38) | def writeVarLen(self, value):
    method write (line 43) | def write(self):
    method getvalue (line 55) | def getvalue(self):

FILE: midi/constants.py
  function is_status (line 207) | def is_status(byte):

FILE: midi/example_print_channel_0.py
  class Transposer (line 9) | class Transposer(MidiOutStream):
    method note_on (line 13) | def note_on(self, channel=0, note=0x40, velocity=0x40):

FILE: midi/example_transpose_octave.py
  class Transposer (line 10) | class Transposer(MidiOutFile):
    method _transp (line 14) | def _transp(self, ch, note):
    method note_on (line 22) | def note_on(self, channel=0, note=0x40, velocity=0x40):
    method note_off (line 27) | def note_off(self, channel=0, note=0x40, velocity=0x40):

FILE: midi/utils.py
  class midiread (line 15) | class midiread(MidiOutStream):
    method __init__ (line 16) | def __init__(self, filename, r=(21, 109), dt=0.2):
    method abs_time_in_seconds (line 31) | def abs_time_in_seconds(self):
    method tempo (line 34) | def tempo(self, value):
    method header (line 39) | def header(self, format=0, nTracks=1, division=96):
    method note_on (line 42) | def note_on(self, channel=0, note=0x40, velocity=0x40):
    method note_off (line 45) | def note_off(self, channel=0, note=0x40, velocity=0x40):
    method sysex_event (line 52) | def sysex_event(*args):
    method device_name (line 55) | def device_name(*args):
  function midiwrite (line 59) | def midiwrite(filename, piano_roll, r=(21, 109), dt=32, patch=0):

FILE: myparser.py
  function read (line 9) | def read(filename, noteAdder, speed=1.0):
  class CountingNoteAdder (line 93) | class CountingNoteAdder(object):
    method __init__ (line 94) | def __init__(self):
    method handle (line 97) | def handle(self, time, pitch, dur):
  class LegatoNoteAdder (line 100) | class LegatoNoteAdder(object):
    method __init__ (line 101) | def __init__(self, maxLen, transpose=0):
    method handle (line 106) | def handle(self, time, pitch, dur):
  function pitchGetter (line 114) | def pitchGetter(letter, octave, offset):
  function fileToData (line 126) | def fileToData(path, transpose=0, windowSize=4):
  function fileToSerialData (line 146) | def fileToSerialData(path):
  function main (line 155) | def main():
  function make_kaldi (line 183) | def make_kaldi(filename, offset):
  function make_keras (line 224) | def make_keras():

FILE: neural-plugin/DoubleTime.js
  function init (line 8) | function init()
  function addChord (line 13) | function addChord(cursor, duration)
  function addNote (line 22) | function addNote(chord, pitch)
  function addRest (line 29) | function addRest(cursor, duration)
  function run (line 37) | function run()

FILE: neural-plugin/neural-plugin.js
  function init (line 32) | function init() {
  function copyChord (line 39) | function copyChord(oldChord) {
  function copyThing (line 51) | function copyThing(source, target) {
  function run (line 64) | function run() {
  function accept (line 82) | function accept() {
Condensed preview — 101 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (9,344K chars).
[
  {
    "path": ".gitignore",
    "chars": 5,
    "preview": "*.pyc"
  },
  {
    "path": "DBN.py",
    "chars": 26167,
    "preview": "\"\"\"\n\"\"\"\nimport cPickle\nimport os\nimport sys\nimport time\nimport os.path as path\nimport copy\n\nimport numpy\n\nimport theano\n"
  },
  {
    "path": "DeepLearningTutorials/.gitignore",
    "chars": 179,
    "preview": "code/*.pyc\ncode/*_plots\ncode/tmp*\ncode/midi\ncode/rnnslu\ndata/atis.*\ndata/mnist.pkl.gz\ndata/mnist_py3k.pkl.gz\ndata/Nottin"
  },
  {
    "path": "DeepLearningTutorials/.hgignore",
    "chars": 28,
    "preview": "syntax: glob\n*.pyc\n*.png\n*~\n"
  },
  {
    "path": "DeepLearningTutorials/.travis.yml",
    "chars": 3150,
    "preview": "# After changing this file, check it on:\n# http://lint.travis-ci.org/\n\n#We can't get scipy installed with the python lan"
  },
  {
    "path": "DeepLearningTutorials/README.rst",
    "chars": 1539,
    "preview": "Deep Learning Tutorials\n=======================\n\nDeep Learning is a new area of Machine Learning research, which has bee"
  },
  {
    "path": "DeepLearningTutorials/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "DeepLearningTutorials/code/DBN.py",
    "chars": 17050,
    "preview": "\"\"\"\n\"\"\"\nimport os\nimport sys\nimport time\n\nimport numpy\n\nimport theano\nimport theano.tensor as T\nfrom theano.tensor.share"
  },
  {
    "path": "DeepLearningTutorials/code/SdA.py",
    "chars": 18933,
    "preview": "\"\"\"\n This tutorial introduces stacked denoising auto-encoders (SdA) using Theano.\n\n Denoising autoencoders are the build"
  },
  {
    "path": "DeepLearningTutorials/code/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "DeepLearningTutorials/code/cA.py",
    "chars": 11850,
    "preview": "\"\"\"This tutorial introduces Contractive auto-encoders (cA) using Theano.\n\n They are based on auto-encoders as the ones u"
  },
  {
    "path": "DeepLearningTutorials/code/convolutional_mlp.py",
    "chars": 12643,
    "preview": "\"\"\"This tutorial introduces the LeNet5 neural network architecture\nusing Theano.  LeNet5 is a convolutional neural netwo"
  },
  {
    "path": "DeepLearningTutorials/code/dA.py",
    "chars": 14605,
    "preview": "\"\"\"\n This tutorial introduces denoising auto-encoders (dA) using Theano.\n\n Denoising autoencoders are the building block"
  },
  {
    "path": "DeepLearningTutorials/code/hmc/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "DeepLearningTutorials/code/hmc/hmc.py",
    "chars": 15113,
    "preview": "\"\"\"\nTODO\n\"\"\"\n\nimport numpy\n\nfrom theano import function, shared\nfrom theano import tensor as TT\nimport theano\n\nsharedX ="
  },
  {
    "path": "DeepLearningTutorials/code/hmc/test_hmc.py",
    "chars": 2218,
    "preview": "import numpy\nfrom scipy import linalg\nimport theano\n\nfrom hmc import HMC_sampler\n\n\ndef sampler_on_nd_gaussian(sampler_cl"
  },
  {
    "path": "DeepLearningTutorials/code/imdb.py",
    "chars": 5285,
    "preview": "import cPickle\nimport gzip\nimport os\n\nimport numpy\nimport theano\n\n\ndef prepare_data(seqs, labels, maxlen=None):\n    \"\"\"C"
  },
  {
    "path": "DeepLearningTutorials/code/imdb_preprocess.py",
    "chars": 3350,
    "preview": "\"\"\"\nThis script is what created the dataset pickled.\n\n1) You need to download this file and put it in the same directory"
  },
  {
    "path": "DeepLearningTutorials/code/logistic_cg.py",
    "chars": 10304,
    "preview": "\"\"\"\nThis tutorial introduces logistic regression using Theano and conjugate\ngradient descent.\n\nLogistic regression is a "
  },
  {
    "path": "DeepLearningTutorials/code/logistic_sgd.py",
    "chars": 16081,
    "preview": "\"\"\"\nThis tutorial introduces logistic regression using Theano and stochastic\ngradient descent.\n\nLogistic regression is a"
  },
  {
    "path": "DeepLearningTutorials/code/lstm.py",
    "chars": 21138,
    "preview": "'''\nBuild a tweet sentiment analyzer\n'''\nfrom collections import OrderedDict\nimport cPickle as pkl\nimport random\nimport "
  },
  {
    "path": "DeepLearningTutorials/code/mlp.py",
    "chars": 14183,
    "preview": "\"\"\"\nThis tutorial introduces the multilayer perceptron using Theano.\n\n A multilayer perceptron is a logistic regressor w"
  },
  {
    "path": "DeepLearningTutorials/code/rbm.py",
    "chars": 20606,
    "preview": "\"\"\"This tutorial introduces restricted boltzmann machines (RBM) using Theano.\n\nBoltzmann Machines (BMs) are a particular"
  },
  {
    "path": "DeepLearningTutorials/code/rnnrbm.py",
    "chars": 11626,
    "preview": "# Author: Nicolas Boulanger-Lewandowski\n# University of Montreal (2012)\n# RNN-RBM deep learning tutorial\n# More informat"
  },
  {
    "path": "DeepLearningTutorials/code/rnnslu.py",
    "chars": 13350,
    "preview": "from collections import OrderedDict\nimport copy\nimport cPickle\nimport gzip\nimport os\nimport urllib\nimport random\nimport "
  },
  {
    "path": "DeepLearningTutorials/code/test.py",
    "chars": 10880,
    "preview": "import sys\n\nimport numpy\n\nimport convolutional_mlp\nimport dA\nimport DBN\nimport logistic_cg\nimport logistic_sgd\nimport ml"
  },
  {
    "path": "DeepLearningTutorials/code/utils.py",
    "chars": 5101,
    "preview": "\"\"\" This file contains different utility functions that are not connected\nin anyway to the networks presented in the tut"
  },
  {
    "path": "DeepLearningTutorials/data/download.sh",
    "chars": 1099,
    "preview": "#!/bin/sh\n\nwhich wget >/dev/null 2>&1\nWGET=$?\nwhich curl >/dev/null 2>&1\nCURL=$?\nif [ \"$WGET\" -eq 0 ]; then\n    DL_CMD=\""
  },
  {
    "path": "DeepLearningTutorials/doc/.templates/layout.html",
    "chars": 617,
    "preview": "{% extends \"!layout.html\" %}\n\n{%- block extrahead %}\n{{ super() }}\n<script type=\"text/javascript\">\n  var _gaq = _gaq || "
  },
  {
    "path": "DeepLearningTutorials/doc/DBN.txt",
    "chars": 12034,
    "preview": ".. _DBN:\n\nDeep Belief Networks\n====================\n\n.. note::\n  This section assumes the reader has already read throug"
  },
  {
    "path": "DeepLearningTutorials/doc/LICENSE.txt",
    "chars": 1524,
    "preview": ".. _license:\n\nLICENSE\n=======\n\nCopyright (c) 2008--2013, Theano Development Team\nAll rights reserved.\n\nRedistribution an"
  },
  {
    "path": "DeepLearningTutorials/doc/Makefile",
    "chars": 31,
    "preview": "all:\n\tpython scripts/docgen.py\n"
  },
  {
    "path": "DeepLearningTutorials/doc/SdA.txt",
    "chars": 8611,
    "preview": ".. _SdA:\n\nStacked Denoising Autoencoders (SdA)\n====================================\n\n.. note::\n  This section assumes yo"
  },
  {
    "path": "DeepLearningTutorials/doc/conf.py",
    "chars": 7072,
    "preview": "# -*- coding: utf-8 -*-\n#\n# theano documentation build configuration file, created by\n# sphinx-quickstart on Tue Oct  7 "
  },
  {
    "path": "DeepLearningTutorials/doc/contents.txt",
    "chars": 227,
    "preview": "\n.. _contents:\n\n========\nContents\n========\n\n.. toctree::\n   :maxdepth: 2\n\n   LICENSE\n   index\n   gettingstarted\n   logre"
  },
  {
    "path": "DeepLearningTutorials/doc/dA.txt",
    "chars": 21832,
    "preview": ".. _daa:\n\nDenoising Autoencoders (dA)\n===========================\n\n.. note::\n  This section assumes the reader has alrea"
  },
  {
    "path": "DeepLearningTutorials/doc/deep.txt",
    "chars": 4374,
    "preview": ".. _deep:\n\nDeep Learning\n=============\n\nThe breakthrough to effective training strategies for deep architectures came in"
  },
  {
    "path": "DeepLearningTutorials/doc/gettingstarted.txt",
    "chars": 29586,
    "preview": ".. _gettingstarted:\n\n\n===============\nGetting Started\n===============\n\nThese tutorials do not attempt to make up for a g"
  },
  {
    "path": "DeepLearningTutorials/doc/hmc.txt",
    "chars": 16068,
    "preview": ".. _HMC:\n\nHybrid Monte-Carlo Sampling\n===========================\n\n\n.. note::\n  This is an advanced tutorial, which show"
  },
  {
    "path": "DeepLearningTutorials/doc/index.txt",
    "chars": 4130,
    "preview": "=======================\nDeep Learning Tutorials\n=======================\n\nDeep Learning is a new area of Machine Learning"
  },
  {
    "path": "DeepLearningTutorials/doc/lenet.txt",
    "chars": 23520,
    "preview": ".. _lenet:\n\nConvolutional Neural Networks (LeNet)\n=====================================\n\n.. note::\n    This section assu"
  },
  {
    "path": "DeepLearningTutorials/doc/logreg.txt",
    "chars": 11478,
    "preview": ".. index:: Logistic Regression\n\n.. _logreg :\n\n\nClassifying MNIST digits using Logistic Regression\n======================"
  },
  {
    "path": "DeepLearningTutorials/doc/lstm.txt",
    "chars": 10507,
    "preview": ".. _lstm:\n\nLSTM Networks for Sentiment Analysis\n**********************************************\n\nSummary\n+++++++\n\nThis tu"
  },
  {
    "path": "DeepLearningTutorials/doc/mlp.txt",
    "chars": 13161,
    "preview": ".. index:: Multilayer Perceptron\n\n.. _mlp:\n\n\nMultilayer Perceptron\n=====================\n\n.. note::\n    This section ass"
  },
  {
    "path": "DeepLearningTutorials/doc/rbm.txt",
    "chars": 23850,
    "preview": ".. _RBM:\n\nRestricted Boltzmann Machines (RBM)\n===================================\n\n\n.. note::\n  This section assumes the"
  },
  {
    "path": "DeepLearningTutorials/doc/references.txt",
    "chars": 3704,
    "preview": ".. _references:\n\n==========\nReferences\n==========\n\n.. [Bengio07] Y. Bengio, P. Lamblin, D. Popovici and H. Larochelle, `"
  },
  {
    "path": "DeepLearningTutorials/doc/rnnrbm.txt",
    "chars": 7325,
    "preview": ".. _rnnrbm:\n\nModeling and generating sequences of polyphonic music with the RNN-RBM\n===================================="
  },
  {
    "path": "DeepLearningTutorials/doc/rnnslu.txt",
    "chars": 21994,
    "preview": ".. _rnnslu:\n\nRecurrent Neural Networks with Word Embeddings\n**********************************************\n\nSummary\n++++"
  },
  {
    "path": "DeepLearningTutorials/doc/scripts/docgen.py",
    "chars": 2062,
    "preview": "from __future__ import print_function\nimport sys\nimport os\nimport shutil\n\nimport getopt\nfrom collections import defaultd"
  },
  {
    "path": "DeepLearningTutorials/doc/utilities.txt",
    "chars": 6144,
    "preview": "=============\nMiscellaneous\n=============\n\n.. _how-to-plot:\n\nPlotting Samples and Filters\n++++++++++++++++++++++++++++\n\n"
  },
  {
    "path": "DeepLearningTutorials/issues_closed/2_RBM_cost_fn.txt",
    "chars": 233,
    "preview": "Reported by : Razvan\n\nCost function (delta of free energy) has a reversed sign (i.e. free_energy(positive) - free_energy"
  },
  {
    "path": "DeepLearningTutorials/issues_open/1_SdA_performance.txt",
    "chars": 242,
    "preview": "Reported by : Razvan\n\nBest performance for SdA float64 CPU : 1.23%\n                         float32 CPU : 1.30%\ntarget :"
  },
  {
    "path": "DeepLearningTutorials/issues_open/3_RBM_scan_GPU.txt",
    "chars": 164,
    "preview": "Reported by : Razvan\n\nScan is not GPU ready.. making RBM tutorial slow on GPU (not tested yet).\nQuick fix is a optimizat"
  },
  {
    "path": "DeepLearningTutorials/issues_open/4_RBM_scan.txt",
    "chars": 419,
    "preview": "Reported by : Razvan\n\nThe bug can be reproduced if you do : \n z = scan(..)\n c = f(z[-1])\n gp = T.grad(c, p, consider_con"
  },
  {
    "path": "DeepLearningTutorials/issues_open/5_results.txt",
    "chars": 226,
    "preview": "Reported by : Razvan\n\nWe should produce results + time for CPU float32 / CPU float64 / GPU . We should also \nspecify the"
  },
  {
    "path": "DeepLearningTutorials/issues_open/6_benchmarking_pybrain.txt",
    "chars": 3426,
    "preview": "Reported by : Razvan\n\nObservations : \n\n    1.  First thing, working with their dataset model is a pain ! Either I had \n "
  },
  {
    "path": "DeepLearningTutorials/misc/do_nightly_build",
    "chars": 1582,
    "preview": "#!/bin/bash\n#we set the compiledir to the /Tmp dir to make the test faster by bypassing the nfs network.\ndate\nROOT_CWD=/"
  },
  {
    "path": "README.md",
    "chars": 805,
    "preview": "# neuralnetmusic\nFelix's project for composing music using neural nets.\n\nThis is not in a state fit for public release r"
  },
  {
    "path": "joplin/alabama.xml",
    "chars": 1178495,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
  },
  {
    "path": "joplin/cleopha.xml",
    "chars": 1082025,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
  },
  {
    "path": "joplin/entertainer.xml",
    "chars": 1059919,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
  },
  {
    "path": "joplin/maple_leaf.xml",
    "chars": 1011301,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
  },
  {
    "path": "joplin/searchlight.xml",
    "chars": 1214672,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
  },
  {
    "path": "joplin/strenous.xml",
    "chars": 971063,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
  },
  {
    "path": "joplin/syncopations.xml",
    "chars": 878247,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
  },
  {
    "path": "joplin/winners.xml",
    "chars": 577147,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
  },
  {
    "path": "joplin/winners_2.xml",
    "chars": 269447,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
  },
  {
    "path": "midi/DataTypeConverters.py",
    "chars": 5516,
    "preview": "# -*- coding: ISO-8859-1 -*-\n\nfrom struct import pack, unpack\n\n\"\"\"\nThis module contains functions for reading and writin"
  },
  {
    "path": "midi/EventDispatcher.py",
    "chars": 9042,
    "preview": "# -*- coding: ISO-8859-1 -*-\n\n# std library\nfrom struct import unpack\n\n# custom\nfrom DataTypeConverters import readBew, "
  },
  {
    "path": "midi/Icon_",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "midi/MidiFileParser.py",
    "chars": 6432,
    "preview": "# -*- coding: ISO-8859-1 -*-\n\n# std library\nfrom struct import unpack\n\n# uhh I don't really like this, but there are so "
  },
  {
    "path": "midi/MidiInFile.py",
    "chars": 1490,
    "preview": "# -*- coding: ISO-8859-1 -*-\n\nfrom RawInstreamFile import RawInstreamFile\nfrom MidiFileParser import MidiFileParser\n\n\ncl"
  },
  {
    "path": "midi/MidiInStream.py",
    "chars": 905,
    "preview": "# -*- coding: ISO-8859-1 -*-\n\nfrom MidiOutStream import MidiOutStream\n\nclass MidiInStream:\n\n    \"\"\"\n    Takes midi event"
  },
  {
    "path": "midi/MidiOutFile.py",
    "chars": 10460,
    "preview": "# -*- coding: ISO-8859-1 -*-\n\nfrom MidiOutStream import MidiOutStream\nfrom RawOutstreamFile import RawOutstreamFile\n\nfro"
  },
  {
    "path": "midi/MidiOutStream.py",
    "chars": 8889,
    "preview": "# -*- coding: ISO-8859-1 -*-\n\nclass MidiOutStream:\n\n\n    \"\"\"\n\n    MidiOutstream is Basically an eventhandler. It is the "
  },
  {
    "path": "midi/MidiToText.py",
    "chars": 4137,
    "preview": "# -*- coding: ISO-8859-1 -*-\n\nfrom MidiOutStream import MidiOutStream\nclass MidiToText(MidiOutStream):\n\n\n    \"\"\"\n    Thi"
  },
  {
    "path": "midi/RawInstreamFile.py",
    "chars": 3005,
    "preview": "# -*- coding: ISO-8859-1 -*-\n\n# standard library imports\nfrom types import StringType\nfrom struct import unpack\n\n# custo"
  },
  {
    "path": "midi/RawOutstreamFile.py",
    "chars": 1641,
    "preview": "# -*- coding: ISO-8859-1 -*-\n\n# standard library imports\nimport sys\nfrom types import StringType\nfrom struct import unpa"
  },
  {
    "path": "midi/__init__.py",
    "chars": 110,
    "preview": "# -*- coding: ISO-8859-1 -*-\n\n#import MidiOutStream\n#import MidiInStream\n#import MidiInFile\n#import MidiToText"
  },
  {
    "path": "midi/changes.txt",
    "chars": 1880,
    "preview": "------------------------------------------------------------------------\nr409 | maxm | 2006-01-05 16:37:29 +0100 (to, 05"
  },
  {
    "path": "midi/constants.py",
    "chars": 6365,
    "preview": "# -*- coding: ISO-8859-1 -*-\n\n###################################################\n## Definitions of the different midi e"
  },
  {
    "path": "midi/example_mimimal_type0.py",
    "chars": 512,
    "preview": "from MidiOutFile import MidiOutFile\n\n\"\"\"\nThis is an example of the smallest possible type 0 midi file, where \nall the mi"
  },
  {
    "path": "midi/example_print_channel_0.py",
    "chars": 491,
    "preview": "from MidiOutStream import MidiOutStream\nfrom MidiInFile import MidiInFile\n\n\"\"\"\nThis prints all note on events on midi ch"
  },
  {
    "path": "midi/example_print_events.py",
    "chars": 495,
    "preview": "from MidiToText import MidiToText\n\n\"\"\"\nThis is an example that uses the MidiToText eventhandler. When an \nevent is trigg"
  },
  {
    "path": "midi/example_print_file.py",
    "chars": 492,
    "preview": "\"\"\"\nThis is an example that uses the MidiToText eventhandler. When an \nevent is triggered on it, it prints the event to "
  },
  {
    "path": "midi/example_transpose_octave.py",
    "chars": 1036,
    "preview": "from MidiOutFile import MidiOutFile\nfrom MidiInFile import MidiInFile\n\n\"\"\"\nThis is an example of the smallest possible t"
  },
  {
    "path": "midi/files.txt",
    "chars": 14861,
    "preview": "Midi file name\tBWV\tK\tB\tEMB\tR\n000106b_.mid\t1.6\t378\t\t375\t\n000206b_.mid\t2.6\t7\t262\t5\t262\n000306b_.mid\t3.6\t8\t156\t8\t156\n000306"
  },
  {
    "path": "midi/license.txt",
    "chars": 692,
    "preview": "Modified Python MIDI package\nCopyright (C) 2013  Nicolas Boulanger-Lewandowski\n\nThis program is free software: you can r"
  },
  {
    "path": "midi/readme",
    "chars": 2855,
    "preview": "JSBChorales.net: ReadMe\rMargaret Greentree\r\r\rThis is a copy of the README found with the midi files of the chorales.\r\rHe"
  },
  {
    "path": "midi/readme.txt",
    "chars": 1316,
    "preview": "\n-------------------------------------------------------------------------------------\nNicolas Boulanger-Lewandowski (Ja"
  },
  {
    "path": "midi/utils.py",
    "chars": 2444,
    "preview": "# Author: Nicolas Boulanger-Lewandowski\n# University of Montreal (2013)\n# RNN-RBM deep learning tutorial\n#\n# Implements "
  },
  {
    "path": "midi/version.txt",
    "chars": 5,
    "preview": "0.1.4"
  },
  {
    "path": "myparser.py",
    "chars": 8782,
    "preview": "import xml.etree.ElementTree as ET\nimport math\nimport numpy as np\nimport subprocess\nfrom PIL import Image\nimport cPickle"
  },
  {
    "path": "neural-plugin/DoubleTime.js",
    "chars": 2800,
    "preview": "//=============================================================================\n//  HalfTime plugin\n//\n//  This plugin c"
  },
  {
    "path": "neural-plugin/neural-plugin.js",
    "chars": 5435,
    "preview": "//=============================================================================\n//  MuseScore\n//  Linux Music Score Edit"
  },
  {
    "path": "neural-plugin/neural-plugin.ui",
    "chars": 1700,
    "preview": "<ui version=\"4.0\" >\n <class>Dialog</class>\n <widget class=\"QDialog\" name=\"Dialog\" >\n  <property name=\"geometry\" >\n   <re"
  },
  {
    "path": "neural-plugin/output-window.ui",
    "chars": 523,
    "preview": "<ui version=\"4.0\" >\n <class>Dialog</class>\n <widget class=\"QDialog\" name=\"Dialog\" >\n  <property name=\"geometry\" >\n   <re"
  }
]

// ... and 4 more files (download for full content)

About this extraction

This page contains the full source code of the fephsun/neuralnetmusic GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 101 files (56.2 MB), approximately 2.2M tokens, and a symbol index with 360 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!