Showing preview only (8,858K chars total). Download the full file or copy to clipboard to get everything.
Repository: fephsun/neuralnetmusic
Branch: master
Commit: 1b559a25bcfb
Files: 101
Total size: 56.2 MB
Directory structure:
gitextract_7ps85ir2/
├── .gitignore
├── DBN.py
├── DeepLearningTutorials/
│ ├── .gitignore
│ ├── .hgignore
│ ├── .travis.yml
│ ├── README.rst
│ ├── __init__.py
│ ├── code/
│ │ ├── DBN.py
│ │ ├── SdA.py
│ │ ├── __init__.py
│ │ ├── cA.py
│ │ ├── convolutional_mlp.py
│ │ ├── dA.py
│ │ ├── hmc/
│ │ │ ├── __init__.py
│ │ │ ├── hmc.py
│ │ │ └── test_hmc.py
│ │ ├── imdb.py
│ │ ├── imdb_preprocess.py
│ │ ├── logistic_cg.py
│ │ ├── logistic_sgd.py
│ │ ├── lstm.py
│ │ ├── mlp.py
│ │ ├── rbm.py
│ │ ├── rnnrbm.py
│ │ ├── rnnslu.py
│ │ ├── test.py
│ │ └── utils.py
│ ├── data/
│ │ ├── download.sh
│ │ └── training_colorpatches_16x16_demo.mat
│ ├── doc/
│ │ ├── .templates/
│ │ │ └── layout.html
│ │ ├── DBN.txt
│ │ ├── LICENSE.txt
│ │ ├── Makefile
│ │ ├── SdA.txt
│ │ ├── conf.py
│ │ ├── contents.txt
│ │ ├── dA.txt
│ │ ├── deep.txt
│ │ ├── gettingstarted.txt
│ │ ├── hmc.txt
│ │ ├── index.txt
│ │ ├── lenet.txt
│ │ ├── logreg.txt
│ │ ├── lstm.txt
│ │ ├── mlp.txt
│ │ ├── rbm.txt
│ │ ├── references.txt
│ │ ├── rnnrbm.txt
│ │ ├── rnnslu.txt
│ │ ├── scripts/
│ │ │ └── docgen.py
│ │ └── utilities.txt
│ ├── issues_closed/
│ │ └── 2_RBM_cost_fn.txt
│ ├── issues_open/
│ │ ├── 1_SdA_performance.txt
│ │ ├── 3_RBM_scan_GPU.txt
│ │ ├── 4_RBM_scan.txt
│ │ ├── 5_results.txt
│ │ └── 6_benchmarking_pybrain.txt
│ └── misc/
│ └── do_nightly_build
├── README.md
├── joplin/
│ ├── alabama.xml
│ ├── cleopha.xml
│ ├── entertainer.xml
│ ├── maple_leaf.xml
│ ├── searchlight.xml
│ ├── strenous.xml
│ ├── syncopations.xml
│ ├── winners.xml
│ └── winners_2.xml
├── joplin-model.pickle
├── joplin_data.pickle
├── midi/
│ ├── DataTypeConverters.py
│ ├── EventDispatcher.py
│ ├── Icon_
│ ├── MidiFileParser.py
│ ├── MidiInFile.py
│ ├── MidiInStream.py
│ ├── MidiOutFile.py
│ ├── MidiOutStream.py
│ ├── MidiToText.py
│ ├── RawInstreamFile.py
│ ├── RawOutstreamFile.py
│ ├── __init__.py
│ ├── changes.txt
│ ├── constants.py
│ ├── example_mimimal_type0.py
│ ├── example_print_channel_0.py
│ ├── example_print_events.py
│ ├── example_print_file.py
│ ├── example_transpose_octave.py
│ ├── files.txt
│ ├── hallelujah.mid
│ ├── license.txt
│ ├── readme
│ ├── readme.txt
│ ├── utils.py
│ └── version.txt
├── myparser.py
└── neural-plugin/
├── DoubleTime.js
├── neural-plugin.js
├── neural-plugin.ui
└── output-window.ui
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.pyc
================================================
FILE: DBN.py
================================================
"""
"""
import cPickle
import os
import sys
import time
import os.path as path
import copy
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from DeepLearningTutorials.code.mlp import HiddenLayer
from DeepLearningTutorials.code.rbm import RBM
from PIL import Image
import myparser
from midi.utils import midiwrite
# compute_test_value is 'off' by default, meaning this feature is inactive
theano.config.compute_test_value = 'off' # Use 'warn' to activate this feature
# For switching between 32 and 64 bit systems, because Theano is a little silly
# like that.
NUMPY_DTYPE = numpy.float64
# start-snippet-1
class AutoencodingDBN(object):
"""
An autoencoding Deep Belief Network, based on the classifying DBN in the
Theano tutorial. (Most of the code is copied over.)
"""
def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
hidden_layers_sizes=[1000, 1000, 1000]):
"""This class is made to support a variable number of layers.
:type numpy_rng: numpy.random.RandomState
:param numpy_rng: numpy random number generator used to draw initial
weights
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
:param theano_rng: Theano random generator; if None is given one is
generated based on a seed drawn from `rng`
:type n_ins: int
:param n_ins: dimension of the input to the DBN
:type hidden_layers_sizes: list of ints
:param hidden_layers_sizes: intermediate layers size, must contain
at least one value
:type n_outs: int
:param n_outs: dimension of the output of the network
"""
self.sigmoid_layers = []
self.rbm_layers = []
self.params = []
self.n_layers = len(hidden_layers_sizes)
self.layer_sizes = hidden_layers_sizes
assert self.n_layers > 0
if not theano_rng:
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
# allocate symbolic variables for the data
self.x = T.matrix('x') # the data is presented as rasterized images
self.x_mask = T.matrix('x_mask') # For partial information.
# end-snippet-1
# The DBN is an MLP, for which all weights of intermediate
# layers are shared with a different RBM. We will first
# construct the DBN as a deep multilayer perceptron, and when
# constructing each sigmoidal layer we also construct an RBM
# that shares weights with that layer. During pretraining we
# will train these RBMs (which will lead to chainging the
# weights of the MLP as well) During finetuning we will finish
# training the DBN by doing stochastic gradient descent on the
# MLP.
for i in xrange(self.n_layers):
# construct the sigmoidal layer
# the size of the input is either the number of hidden
# units of the layer below or the input size if we are on
# the first layer
if i == 0:
input_size = n_ins
else:
input_size = hidden_layers_sizes[i - 1]
# the input to this layer is either the activation of the
# hidden layer below or the input of the DBN if you are on
# the first layer
if i == 0:
layer_input = self.x
else:
layer_input = self.sigmoid_layers[-1].output
sigmoid_layer = HiddenLayer(rng=numpy_rng,
input=layer_input,
n_in=input_size,
n_out=hidden_layers_sizes[i],
activation=T.nnet.sigmoid)
# add the layer to our list of layers
self.sigmoid_layers.append(sigmoid_layer)
# Construct an RBM that shared weights with this layer
rbm_layer = RBM(numpy_rng=numpy_rng,
theano_rng=theano_rng,
input=layer_input,
n_visible=input_size,
n_hidden=hidden_layers_sizes[i],
W=sigmoid_layer.W,
hbias=sigmoid_layer.b)
self.rbm_layers.append(rbm_layer)
self.params.extend(rbm_layer.params)
# And build the upside-down network. This shares parameters with the
# forward network. Except the weights are transposed and stuff.
# The "isolated" layers let you run only the upside-down part of the
# network, for generation. The non-isolated layers are connected to
# the forward, compressing part of the network, and are used for
# training.
reverse_input = self.sigmoid_layers[-1].output
self.isolated_reverse_input = theano.shared(
numpy.zeros([10, hidden_layers_sizes[-1]]))
isolated_input = self.isolated_reverse_input
self.reverse_layers = [None] * self.n_layers
self.isolated_reverse = [None] * self.n_layers
for i in reversed(xrange(self.n_layers)):
if i == 0:
out_size = n_ins
else:
out_size = hidden_layers_sizes[i-1]
reverse_sigmoid = HiddenLayer(rng=numpy_rng,
input=reverse_input,
n_in=hidden_layers_sizes[i],
n_out=out_size,
W=self.sigmoid_layers[i].W.T,
b=self.rbm_layers[i].vbias,
activation=T.nnet.sigmoid
)
isolated_sigmoid = HiddenLayer(rng=numpy_rng,
input=isolated_input,
n_in=hidden_layers_sizes[i],
n_out=out_size,
W=self.sigmoid_layers[i].W.T,
b=self.rbm_layers[i].vbias,
activation=T.nnet.sigmoid
)
reverse_input = reverse_sigmoid.output
isolated_input = isolated_sigmoid.output
self.reverse_layers[i] = reverse_sigmoid
self.isolated_reverse[i] = isolated_sigmoid
# The fine-tune cost is the reconstruction error of the entire net.
self.finetune_cost = ((self.x - self.reverse_layers[0].output)**2).sum()
# The cost for training the generative net - in this case, self.x is
# completely disconnected, and we feed a pattern into the reverse net.
self.generative_cost = ((self.x - self.isolated_reverse[0].output)**2).sum()
# The l1 cost is for generating constrained samples of the input. (Aka
# harmonizing a melody.) Given a melody in self.x and a mask
# self.x_mask of which parts of self.x actually matter, it computes the
# error between the generated sample and the melody.
self.l1_cost = (((self.x - self.isolated_reverse[0].output) * self.x_mask)**2).sum()
def dump_params(self, outLoc):
"""
Takes all of the weights, and stores them as numpy arrays.
This is so the params are portable between GPU machines and CPU machines.
To load the params, you need to call load_from_dump, which re-makes your
DBN.
"""
dump = {}
for layer in range(self.n_layers):
dump[(layer, 0)] = numpy.array(self.sigmoid_layers[layer].W.get_value())
dump[(layer, 1)] = numpy.array(self.sigmoid_layers[layer].b.get_value())
dump[(layer, 2)] = numpy.array(self.reverse_layers[layer].b.get_value())
cPickle.dump(dump, open(outLoc, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
def pretraining_functions(self, train_set_x, batch_size, k):
'''Generates a list of functions, for performing one step of
gradient descent at a given layer. The function will require
as input the minibatch index, and to train an RBM you just
need to iterate, calling the corresponding function on all
minibatch indexes.
:type train_set_x: theano.tensor.TensorType
:param train_set_x: Shared var. that contains all datapoints used
for training the RBM
:type batch_size: int
:param batch_size: size of a [mini]batch
:param k: number of Gibbs steps to do in CD-k / PCD-k
'''
# index to a [mini]batch
index = T.lscalar('index') # index to a minibatch
learning_rate = T.scalar('lr') # learning rate to use
# number of batches
n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
# begining of a batch, given `index`
batch_begin = index * batch_size
# ending of a batch given `index`
batch_end = batch_begin + batch_size
pretrain_fns = []
for rbm in self.rbm_layers:
# get the cost and the updates list
# using CD-k here (persisent=None) for training each RBM.
# TODO: change cost function to reconstruction error
cost, updates = rbm.get_cost_updates(learning_rate,
persistent=None, k=k)
# compile the theano function
fn = theano.function(
inputs=[index, theano.Param(learning_rate, default=0.1)],
outputs=cost,
updates=updates,
givens={
self.x: train_set_x[batch_begin:batch_end]
}
)
# append `fn` to the list of functions
pretrain_fns.append(fn)
return pretrain_fns
def build_finetune_functions(self, train_set_x, batch_size, learning_rate):
'''Generates a function `train` that implements one step of
finetuning, a function `validate` that computes the error on a
batch from the validation set, and a function `test` that
computes the error on a batch from the testing set
:type datasets: list of pairs of theano.tensor.TensorType
:param datasets: It is a list that contain all the datasets;
the has to contain three pairs, `train`,
`valid`, `test` in this order, where each pair
is formed of two Theano variables, one for the
datapoints, the other for the labels
:type batch_size: int
:param batch_size: size of a minibatch
:type learning_rate: float
:param learning_rate: learning rate used during finetune stage
'''
index = T.lscalar('index') # index to a [mini]batch
n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
# compute the gradients with respect to the model parameters
gparams = T.grad(self.finetune_cost, self.params)
# compute list of fine-tuning updates
updates = []
for param, gparam in zip(self.params, gparams):
updates.append((param, param - gparam * learning_rate))
train_fn = theano.function(
inputs=[index],
outputs=self.finetune_cost,
updates=updates,
givens={
self.x: train_set_x[
index * batch_size: (index + 1) * batch_size
],
}
)
test_score_i = theano.function(
[index],
self.finetune_cost,
givens={
self.x: train_set_x[
index * batch_size: (index + 1) * batch_size
],
}
)
# Create a function that scans the entire test set
def test_score():
return [test_score_i(i) for i in xrange(n_batches)]
return train_fn, test_score
def build_generative_finetune_fns(self, train_set_outputs, train_set_labels,
batch_size, learning_rate):
index = T.lscalar('index') # index to a [mini]batch
n_batches = train_set_outputs.get_value(borrow=True).shape[0] / batch_size
# compute the gradients with respect to the model parameters
# First, only one of the RBM biases is actually a parameter of the
# generative model, so we have to fix that.
gen_params = []
for i in range(self.n_layers):
gen_params.append(self.rbm_layers[i].vbias)
gen_params.append(self.rbm_layers[i].W)
gparams = T.grad(self.generative_cost, gen_params)
# compute list of fine-tuning updates
updates = []
for param, gparam in zip(gen_params, gparams):
updates.append((param, param - gparam * learning_rate))
train_fn = theano.function(
inputs=[index],
outputs=self.generative_cost,
updates=updates,
givens={
self.x: train_set_outputs[
index * batch_size: (index + 1) * batch_size
],
self.isolated_reverse[-1].input: train_set_labels[
index * batch_size: (index + 1) * batch_size
],
}
)
test_score_i = theano.function(
[index],
self.generative_cost,
givens={
self.x: train_set_outputs[
index * batch_size: (index + 1) * batch_size
],
self.isolated_reverse[-1].input: train_set_labels[
index * batch_size: (index + 1) * batch_size
],
}
)
# Create a function that scans the entire test set
def test_score():
return [test_score_i(i) for i in xrange(n_batches)]
return train_fn, test_score
def generate(self, top_level):
"""
Make a new piano roll, given top level values. (Uses the backwards
section of the network to make a sample.)
"""
generator = theano.function(
[],
self.reverse_layers[0].output,
givens={
self.reverse_layers[-1].input: top_level
}
)
return generator()
def label(self, to_label, x_mask, learning_rate):
"""
Estimate top layer, given an incomplete layer 1.
x_mask represents which values of to_label are unknown.
"""
grad = T.grad(self.l1_cost, self.isolated_reverse_input)
# compute list of fine-tuning updates
updates = (self.isolated_reverse_input,
self.isolated_reverse_input - grad * learning_rate)
train_fn = theano.function(
inputs=[],
outputs=self.l1_cost,
updates=[updates],
givens={
self.x: to_label,
self.x_mask: x_mask,
}
)
return train_fn
def train_dbn(self, data_file, finetune_lr=0.01, pretraining_epochs=100,
pretrain_lr=0.05, k=1, training_epochs=1000, batch_size=10):
raw_x = cPickle.load(open(data_file, 'rb')).astype(dtype=NUMPY_DTYPE)
train_set_x = theano.shared(raw_x)
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
print n_train_batches
# start-snippet-2
#########################
# PRETRAINING THE MODEL #
#########################
print '... getting the pretraining functions'
pretraining_fns = self.pretraining_functions(train_set_x=train_set_x,
batch_size=batch_size,
k=k)
print '... pre-training the model'
start_time = time.clock()
## Pre-train layer-wise
for i in xrange(self.n_layers - 1):
# go through pretraining epochs
for epoch in xrange(pretraining_epochs):
# go through the training set
c = []
for batch_index in xrange(n_train_batches):
c.append(pretraining_fns[i](index=batch_index,
lr=pretrain_lr))
print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
print numpy.mean(c)
end_time = time.clock()
# end-snippet-2
print >> sys.stderr, ('The pretraining code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time) / 60.))
# If you'd like to try out different parameters for the fine-tuner only,
# you can cache the initial model state, so you don't have to pre-train
# every time.
cPickle.dump(self, open('initial-model.pickle', 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
########################
# FINETUNING THE MODEL #
########################
# get the training, validation and testing function for the model
print '... getting the finetuning functions'
use_autoencoder = False
if use_autoencoder:
train_fn, test_model = self.build_finetune_functions(
train_set_x=train_set_x,
batch_size=batch_size,
learning_rate=finetune_lr
)
else:
raw_labels = numpy.random.randint(2,
size=[raw_x.shape[0], self.layer_sizes[-1]])\
.astype(dtype=numpy.float64)
labels = theano.shared(raw_labels)
train_fn, test_model = self.build_generative_finetune_fns(
train_set_outputs=train_set_x,
train_set_labels=labels,
batch_size=batch_size,
learning_rate=finetune_lr
)
print '... finetuning the model'
# early-stopping parameters
patience = 4 * n_train_batches # look as this many examples regardless
patience_increase = 2. # wait this much longer when a new best is
# found
improvement_threshold = 0.995 # a relative improvement of this much is
# considered significant
validation_frequency = min(n_train_batches, patience / 2)
# go through this many
# minibatches before checking the network
# on the validation set; in this case we
# check every epoch
best_validation_loss = numpy.inf
test_score = 0.
start_time = time.clock()
done_looping = False
epoch = 0
while (epoch < training_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_fn(minibatch_index)
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
validation_losses = test_model()
this_validation_loss = numpy.mean(validation_losses)
print(
'epoch %i, minibatch %i/%i, validation error %f %%'
% (
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)
# if we got the best validation score until now
if this_validation_loss < best_validation_loss:
#improve patience if loss improvement is good enough
if (
this_validation_loss < best_validation_loss *
improvement_threshold
):
patience = max(patience, iter * patience_increase)
# save best validation score and iteration number
best_validation_loss = this_validation_loss
best_iter = iter
if patience <= iter:
done_looping = True
break
end_time = time.clock()
print(
(
'Optimization complete with best validation score of %f, '
'obtained at iteration %i, '
) % (best_validation_loss, best_iter + 1)
)
print >> sys.stderr, ('The fine tuning code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time)
/ 60.))
self.dump_params('./my-model.pickle')
def sample(self, top_level=None, rootLoc='./', save=True, threshold=0.5,
filename='test.midi'):
"""
Generates a sample from the trained neural net. top_level is a 10 x
[size of top layer] matrix whose rows contain values for the top
layer. Most of the time, I only use the first row, but you can only
process data in increments of batch_size.
"""
if top_level is None:
top_level_size = self.layer_sizes[-1]
top_level = numpy.random.randint(2, size=[10, top_level_size])\
.astype(dtype=NUMPY_DTYPE)
output = self.generate(top_level)
output = output.reshape([10, 88*64])
firstIm = output[0, :].reshape([88, 64])
# Makes a little picture of the piano roll.
outIm = Image.fromarray((firstIm*255).astype('uint8'))
outIm.save(path.join(rootLoc, 'test.png'))
if threshold is not None:
firstIm[firstIm > threshold] = 1
firstIm[firstIm <= threshold] = 0
if save:
midiwrite(path.join(rootLoc, filename), firstIm.T, r=(12, 109), dt=64)
return firstIm
def label_from_file(self, rootLoc, fileLoc, learn_rate, n_iters, threshold):
"""
Given a xml file at fileLoc, harmonizes the melody in the xml file, by
doing gradient descent on the top hidden layer of the network. This
gives us an estimate of the top layer activations that might generate
the melody. We then run the network forwards to get the entire harmony
from the top level activations that we estimate.
"""
noteReader = myparser.LegatoNoteAdder(64)
myparser.read(fileLoc, noteReader.handle)
snippet = noteReader.mtx
mask = melody_blocker(snippet)
linear_snippet = snippet.reshape([88*64])
linear_mask = mask.reshape([88*64])
in_data = numpy.zeros([10, 88*64])
x_mask = numpy.zeros([10, 88*64])
for i in range(10):
in_data[i, :] = linear_snippet
x_mask[i, :] = linear_mask
# Do gradient descent to estimate the activations on layer 1.
new_vals = theano.shared(
value=numpy.random.sample([10, self.layer_sizes[-1]]),
)
f = theano.function(
inputs=[],
updates=[(self.isolated_reverse_input, new_vals)],
)
f()
trainer = self.label(in_data, x_mask, learn_rate)
for i in range(n_iters):
print trainer()
# Then, generate using it.
result = dbn.sample(self.isolated_reverse_input, rootLoc=rootLoc, save=False,
threshold=threshold)
# Add the melody back onto the snippet.
final = result * (1.0 - mask)
final = final + snippet
final[final > 0.5] = 1
midiwrite(path.join(rootLoc, 'test.midi'), final.T, r=(12, 109), dt=64)
return final
def melody_blocker(snippet):
"""
Makes a mask where anything above the top line of the snippet is 1. Also
enforces empty space a major 2nd above and below the melody. (This means
the optimizer will consider any note above the top line of the melody, or
too close to the melody, wrong.)
"""
envelope = numpy.copy(snippet)
_, length = snippet.shape
for i in range(length):
occupied = [x for x in range(88) if snippet[x, i] != 0]
if len(occupied) == 0:
continue
top = max(occupied)
envelope[top:, i] = 1
for pitch in occupied:
envelope[pitch-2:pitch+3, i] = 1
return envelope
def load_from_dump(inLoc):
"""
Loads data from dumped state (generated by dumped_params), and creates a
new DBN.
"""
dump = cPickle.load(open(inLoc, 'rb'))
# Get the number of layers.
max_layer = 0
for layer, _ in dump:
if layer > max_layer:
max_layer = layer
max_layer += 1
# Get the size of each layer.
layer_sizes = []
for layer in range(max_layer):
layer_sizes.append(len(dump[(layer, 1)]))
# For now, the size of the input is fixed at 88x64, but you can read that
# out of the dump, as well.
dbn = AutoencodingDBN(numpy_rng=numpy.random.RandomState(),
n_ins=88*64,
hidden_layers_sizes=layer_sizes)
for layer in range(max_layer):
dbn.sigmoid_layers[layer].W.set_value(dump[(layer, 0)])
dbn.sigmoid_layers[layer].b.set_value(dump[(layer, 1)])
dbn.reverse_layers[layer].b.set_value(dump[(layer, 2)])
return dbn
if __name__ == '__main__':
if sys.argv[1] == 'train':
dbn = AutoencodingDBN(numpy_rng=numpy.random.RandomState(),
n_ins=88*64,
hidden_layers_sizes=[1024, 256, 64])
dbn.train_dbn('./joplin-data.pickle')
exit()
dbn = load_from_dump('./joplin-model.pickle')
import sys
if sys.argv[1] == 'sample':
dbn.sample(threshold=0.5)
elif sys.argv[1] == 'harmonize':
dbn.label_from_file(path.dirname(sys.argv[0]), './12-days.xml',
0.01, 500, 0.4)
else:
print "invalid command"
================================================
FILE: DeepLearningTutorials/.gitignore
================================================
code/*.pyc
code/*_plots
code/tmp*
code/midi
code/rnnslu
data/atis.*
data/mnist.pkl.gz
data/mnist_py3k.pkl.gz
data/Nottingham.zip
data/Nottingham
data/midi.zip
html
*.pyc
*~
*.swp
================================================
FILE: DeepLearningTutorials/.hgignore
================================================
syntax: glob
*.pyc
*.png
*~
================================================
FILE: DeepLearningTutorials/.travis.yml
================================================
# After changing this file, check it on:
# http://lint.travis-ci.org/
#We can't get scipy installed with the python language
#So we will use the system python from the c language.
language: c
#language: python
#python:
# - "2.5"
# - "2.7"
# - "3.2"
# command to install dependencies
before_install:
#zlib1g-dev is needed to allow PIL to uncompress the dataset.
- sudo apt-get update
- sudo apt-get install -qq libatlas3gf-base libatlas-dev zlib1g-dev zip unzip zlibc libzip-dev libjpeg8 libjpeg62-dev libfreetype6 libfreetype6-dev python-numpy python-scipy python-pip python-nose python-yaml pyflakes python-imaging
install:
# - "pip install -q numpy --use-mirrors"
# Use Pillow instead of PIL as it is better packaged
# - "pip install -q Pillow --use-mirrors"
#If we don't install numpy before SciPy 0.10.1, the SciPy installations fails.
# - "pip install -q scipy --use-mirrors"
- "sudo pip install --no-deps git+git://github.com/Theano/Theano.git"
env:
- PART="test.py:test_logistic_sgd test.py:test_logistic_cg test.py:test_mlp test.py:test_convolutional_mlp test.py:test_dA"
- PART="test.py:test_SdA"
- PART="test.py:test_dbn"
- PART="test.py:test_rbm test.py:test_rnnrbm"
- PART="-e test.py"
#i7-2600K CPU @ 3.40GHz
#166.572s #8 test.test_rbm OK
#155.114s #7 test.test_dbn OK
#152.365s #9 test.test_rnnrbm OK
#127.286s #6 test.test_SdA OK
#39.252s #5 test.test_dA OK
#27.56s #4 test.test_convolutional_mlp OK
#15.454s #3 test.test_mlp OK
#12.732s #1 test.test_logistic_sgd OK
#12.638s #2 test.test_logistic_cg OK
#i7-920
#296.475s #7 code.test.test_dbn OK
#257.272s #6 code.test.test_SdA OK
#234.776s #9 code.test.test_rnnrbm OK
#233.896s #8 code.test.test_rbm OK
#65.737s #5 code.test.test_dA OK
#37.658s #4 code.test.test_convolutional_mlp OK
#24.172s #3 code.test.test_mlp OK
#20.401s #1 code.test.test_logistic_sgd OK
#17.546s #2 code.test.test_logistic_cg OK
# On Core2 duo E8500 with MRG
#308.004s #7 code.test.test_dbn OK
#277.268s #6 code.test.test_SdA OK
#126.102s #8 code.test.test_rbm OK
#123.652s #9 code.test.test_rnnrbm OK
#77.101s #5 code.test.test_dA OK
#39.75s #4 code.test.test_convolutional_mlp OK
#30.406s #3 code.test.test_mlp OK
#21.132s #2 code.test.test_logistic_cg OK
#17.945s #1 code.test.test_logistic_sgd OK
# Unknown computer with older version of Theano
#569.882s #9 code.test.test_rbm OK
#298.992s #8 code.test.test_dbn OK
#268.901s #7 code.test.test_SdA OK
#67.292s #6 code.test.test_dA OK
#27.485s #4 code.test.test_mlp OK
#26.204s #5 code.test.test_convolutional_mlp OK
#14.676s #3 code.test.test_logistic_cg OK
#10.66s #2 code.test.test_logistic_sgd OK
#5.795s #1 code.hmc.test_hmc.test_hmc OK
script:
- cd data
- ./download.sh
- ls
- cd ../code
- pwd
- ls
- export THEANO_FLAGS=warn.ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise
- python --version
- nosetests $PART
================================================
FILE: DeepLearningTutorials/README.rst
================================================
Deep Learning Tutorials
=======================
Deep Learning is a new area of Machine Learning research, which has been
introduced with the objective of moving Machine Learning closer to one of its
original goals: Artificial Intelligence. Deep Learning is about learning
multiple levels of representation and abstraction that help to make sense of
data such as images, sound, and text. The tutorials presented here will
introduce you to some of the most important deep learning algorithms and will
also show you how to run them using Theano. Theano is a python library that
makes writing deep learning models easy, and gives the option of training them
on a GPU.
The easiest way to follow the tutorials is to `browse them online
<http://deeplearning.net/tutorial/>`_.
`Main development <http://github.com/lisa-lab/DeepLearningTutorials>`_
of this project.
.. image:: https://secure.travis-ci.org/lisa-lab/DeepLearningTutorials.png
:target: http://travis-ci.org/lisa-lab/DeepLearningTutorials
Project Layout
--------------
Subdirectories:
- code - Python files corresponding to each tutorial
- data - data and scripts to download data that is used by the tutorials
- doc - restructured text used by Sphinx to build the tutorial website
- html - built automatically by doc/Makefile, contains tutorial website
- issues_closed - issue tracking
- issues_open - issue tracking
- misc - administrative scripts
Build instructions
------------------
To build the html version of the tutorials, install sphinx and run doc/Makefile
================================================
FILE: DeepLearningTutorials/__init__.py
================================================
================================================
FILE: DeepLearningTutorials/code/DBN.py
================================================
"""
"""
import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from logistic_sgd import LogisticRegression, load_data
from mlp import HiddenLayer
from rbm import RBM
# start-snippet-1
class DBN(object):
"""Deep Belief Network
A deep belief network is obtained by stacking several RBMs on top of each
other. The hidden layer of the RBM at layer `i` becomes the input of the
RBM at layer `i+1`. The first layer RBM gets as input the input of the
network, and the hidden layer of the last RBM represents the output. When
used for classification, the DBN is treated as a MLP, by adding a logistic
regression layer on top.
"""
def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
hidden_layers_sizes=[500, 500], n_outs=10):
"""This class is made to support a variable number of layers.
:type numpy_rng: numpy.random.RandomState
:param numpy_rng: numpy random number generator used to draw initial
weights
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
:param theano_rng: Theano random generator; if None is given one is
generated based on a seed drawn from `rng`
:type n_ins: int
:param n_ins: dimension of the input to the DBN
:type hidden_layers_sizes: list of ints
:param hidden_layers_sizes: intermediate layers size, must contain
at least one value
:type n_outs: int
:param n_outs: dimension of the output of the network
"""
self.sigmoid_layers = []
self.rbm_layers = []
self.params = []
self.n_layers = len(hidden_layers_sizes)
assert self.n_layers > 0
if not theano_rng:
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
# allocate symbolic variables for the data
self.x = T.matrix('x') # the data is presented as rasterized images
self.y = T.ivector('y') # the labels are presented as 1D vector
# of [int] labels
# end-snippet-1
# The DBN is an MLP, for which all weights of intermediate
# layers are shared with a different RBM. We will first
# construct the DBN as a deep multilayer perceptron, and when
# constructing each sigmoidal layer we also construct an RBM
# that shares weights with that layer. During pretraining we
# will train these RBMs (which will lead to chainging the
# weights of the MLP as well) During finetuning we will finish
# training the DBN by doing stochastic gradient descent on the
# MLP.
for i in xrange(self.n_layers):
# construct the sigmoidal layer
# the size of the input is either the number of hidden
# units of the layer below or the input size if we are on
# the first layer
if i == 0:
input_size = n_ins
else:
input_size = hidden_layers_sizes[i - 1]
# the input to this layer is either the activation of the
# hidden layer below or the input of the DBN if you are on
# the first layer
if i == 0:
layer_input = self.x
else:
layer_input = self.sigmoid_layers[-1].output
sigmoid_layer = HiddenLayer(rng=numpy_rng,
input=layer_input,
n_in=input_size,
n_out=hidden_layers_sizes[i],
activation=T.nnet.sigmoid)
# add the layer to our list of layers
self.sigmoid_layers.append(sigmoid_layer)
# its arguably a philosophical question... but we are
# going to only declare that the parameters of the
# sigmoid_layers are parameters of the DBN. The visible
# biases in the RBM are parameters of those RBMs, but not
# of the DBN.
self.params.extend(sigmoid_layer.params)
# Construct an RBM that shared weights with this layer
rbm_layer = RBM(numpy_rng=numpy_rng,
theano_rng=theano_rng,
input=layer_input,
n_visible=input_size,
n_hidden=hidden_layers_sizes[i],
W=sigmoid_layer.W,
hbias=sigmoid_layer.b)
self.rbm_layers.append(rbm_layer)
# We now need to add a logistic layer on top of the MLP
self.logLayer = LogisticRegression(
input=self.sigmoid_layers[-1].output,
n_in=hidden_layers_sizes[-1],
n_out=n_outs)
self.params.extend(self.logLayer.params)
# compute the cost for second phase of training, defined as the
# negative log likelihood of the logistic regression (output) layer
self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
# compute the gradients with respect to the model parameters
# symbolic variable that points to the number of errors made on the
# minibatch given by self.x and self.y
self.errors = self.logLayer.errors(self.y)
def pretraining_functions(self, train_set_x, batch_size, k):
'''Generates a list of functions, for performing one step of
gradient descent at a given layer. The function will require
as input the minibatch index, and to train an RBM you just
need to iterate, calling the corresponding function on all
minibatch indexes.
:type train_set_x: theano.tensor.TensorType
:param train_set_x: Shared var. that contains all datapoints used
for training the RBM
:type batch_size: int
:param batch_size: size of a [mini]batch
:param k: number of Gibbs steps to do in CD-k / PCD-k
'''
# index to a [mini]batch
index = T.lscalar('index') # index to a minibatch
learning_rate = T.scalar('lr') # learning rate to use
# number of batches
n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
# begining of a batch, given `index`
batch_begin = index * batch_size
# ending of a batch given `index`
batch_end = batch_begin + batch_size
pretrain_fns = []
for rbm in self.rbm_layers:
# get the cost and the updates list
# using CD-k here (persisent=None) for training each RBM.
# TODO: change cost function to reconstruction error
cost, updates = rbm.get_cost_updates(learning_rate,
persistent=None, k=k)
# compile the theano function
fn = theano.function(
inputs=[index, theano.Param(learning_rate, default=0.1)],
outputs=cost,
updates=updates,
givens={
self.x: train_set_x[batch_begin:batch_end]
}
)
# append `fn` to the list of functions
pretrain_fns.append(fn)
return pretrain_fns
def build_finetune_functions(self, datasets, batch_size, learning_rate):
'''Generates a function `train` that implements one step of
finetuning, a function `validate` that computes the error on a
batch from the validation set, and a function `test` that
computes the error on a batch from the testing set
:type datasets: list of pairs of theano.tensor.TensorType
:param datasets: It is a list that contain all the datasets;
the has to contain three pairs, `train`,
`valid`, `test` in this order, where each pair
is formed of two Theano variables, one for the
datapoints, the other for the labels
:type batch_size: int
:param batch_size: size of a minibatch
:type learning_rate: float
:param learning_rate: learning rate used during finetune stage
'''
(train_set_x, train_set_y) = datasets[0]
(valid_set_x, valid_set_y) = datasets[1]
(test_set_x, test_set_y) = datasets[2]
# compute number of minibatches for training, validation and testing
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
n_valid_batches /= batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0]
n_test_batches /= batch_size
index = T.lscalar('index') # index to a [mini]batch
# compute the gradients with respect to the model parameters
gparams = T.grad(self.finetune_cost, self.params)
# compute list of fine-tuning updates
updates = []
for param, gparam in zip(self.params, gparams):
updates.append((param, param - gparam * learning_rate))
train_fn = theano.function(
inputs=[index],
outputs=self.finetune_cost,
updates=updates,
givens={
self.x: train_set_x[
index * batch_size: (index + 1) * batch_size
],
self.y: train_set_y[
index * batch_size: (index + 1) * batch_size
]
}
)
test_score_i = theano.function(
[index],
self.errors,
givens={
self.x: test_set_x[
index * batch_size: (index + 1) * batch_size
],
self.y: test_set_y[
index * batch_size: (index + 1) * batch_size
]
}
)
valid_score_i = theano.function(
[index],
self.errors,
givens={
self.x: valid_set_x[
index * batch_size: (index + 1) * batch_size
],
self.y: valid_set_y[
index * batch_size: (index + 1) * batch_size
]
}
)
# Create a function that scans the entire validation set
def valid_score():
return [valid_score_i(i) for i in xrange(n_valid_batches)]
# Create a function that scans the entire test set
def test_score():
return [test_score_i(i) for i in xrange(n_test_batches)]
return train_fn, valid_score, test_score
def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
pretrain_lr=0.01, k=1, training_epochs=1000,
dataset='mnist.pkl.gz', batch_size=10):
"""
Demonstrates how to train and test a Deep Belief Network.
This is demonstrated on MNIST.
:type finetune_lr: float
:param finetune_lr: learning rate used in the finetune stage
:type pretraining_epochs: int
:param pretraining_epochs: number of epoch to do pretraining
:type pretrain_lr: float
:param pretrain_lr: learning rate to be used during pre-training
:type k: int
:param k: number of Gibbs steps in CD/PCD
:type training_epochs: int
:param training_epochs: maximal number of iterations ot run the optimizer
:type dataset: string
:param dataset: path the the pickled dataset
:type batch_size: int
:param batch_size: the size of a minibatch
"""
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
# numpy random generator
numpy_rng = numpy.random.RandomState(123)
print '... building the model'
# construct the Deep Belief Network
dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28,
hidden_layers_sizes=[1000, 1000, 1000],
n_outs=10)
# start-snippet-2
#########################
# PRETRAINING THE MODEL #
#########################
print '... getting the pretraining functions'
pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x,
batch_size=batch_size,
k=k)
print '... pre-training the model'
start_time = time.clock()
## Pre-train layer-wise
for i in xrange(dbn.n_layers):
# go through pretraining epochs
for epoch in xrange(pretraining_epochs):
# go through the training set
c = []
for batch_index in xrange(n_train_batches):
c.append(pretraining_fns[i](index=batch_index,
lr=pretrain_lr))
print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
print numpy.mean(c)
end_time = time.clock()
# end-snippet-2
print >> sys.stderr, ('The pretraining code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time) / 60.))
########################
# FINETUNING THE MODEL #
########################
# get the training, validation and testing function for the model
print '... getting the finetuning functions'
train_fn, validate_model, test_model = dbn.build_finetune_functions(
datasets=datasets,
batch_size=batch_size,
learning_rate=finetune_lr
)
print '... finetuning the model'
# early-stopping parameters
patience = 4 * n_train_batches # look as this many examples regardless
patience_increase = 2. # wait this much longer when a new best is
# found
improvement_threshold = 0.995 # a relative improvement of this much is
# considered significant
validation_frequency = min(n_train_batches, patience / 2)
# go through this many
# minibatches before checking the network
# on the validation set; in this case we
# check every epoch
best_validation_loss = numpy.inf
test_score = 0.
start_time = time.clock()
done_looping = False
epoch = 0
while (epoch < training_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_fn(minibatch_index)
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
validation_losses = validate_model()
this_validation_loss = numpy.mean(validation_losses)
print(
'epoch %i, minibatch %i/%i, validation error %f %%'
% (
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)
# if we got the best validation score until now
if this_validation_loss < best_validation_loss:
#improve patience if loss improvement is good enough
if (
this_validation_loss < best_validation_loss *
improvement_threshold
):
patience = max(patience, iter * patience_increase)
# save best validation score and iteration number
best_validation_loss = this_validation_loss
best_iter = iter
# test it on the test set
test_losses = test_model()
test_score = numpy.mean(test_losses)
print((' epoch %i, minibatch %i/%i, test error of '
'best model %f %%') %
(epoch, minibatch_index + 1, n_train_batches,
test_score * 100.))
if patience <= iter:
done_looping = True
break
end_time = time.clock()
print(
(
'Optimization complete with best validation score of %f %%, '
'obtained at iteration %i, '
'with test performance %f %%'
) % (best_validation_loss * 100., best_iter + 1, test_score * 100.)
)
print >> sys.stderr, ('The fine tuning code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time)
/ 60.))
if __name__ == '__main__':
test_DBN()
================================================
FILE: DeepLearningTutorials/code/SdA.py
================================================
"""
This tutorial introduces stacked denoising auto-encoders (SdA) using Theano.
Denoising autoencoders are the building blocks for SdA.
They are based on auto-encoders as the ones used in Bengio et al. 2007.
An autoencoder takes an input x and first maps it to a hidden representation
y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting
latent representation y is then mapped back to a "reconstructed" vector
z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b'). The weight
matrix W' can optionally be constrained such that W' = W^T, in which case
the autoencoder is said to have tied weights. The network is trained such
that to minimize the reconstruction error (the error between x and z).
For the denosing autoencoder, during training, first x is corrupted into
\tilde{x}, where \tilde{x} is a partially destroyed version of x by means
of a stochastic mapping. Afterwards y is computed as before (using
\tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction
error is now measured between z and the uncorrupted input x, which is
computed as the cross-entropy :
- \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)]
References :
- P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and
Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103,
2008
- Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
Training of Deep Networks, Advances in Neural Information Processing
Systems 19, 2007
"""
import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from logistic_sgd import LogisticRegression, load_data
from mlp import HiddenLayer
from dA import dA
# start-snippet-1
class SdA(object):
"""Stacked denoising auto-encoder class (SdA)
A stacked denoising autoencoder model is obtained by stacking several
dAs. The hidden layer of the dA at layer `i` becomes the input of
the dA at layer `i+1`. The first layer dA gets as input the input of
the SdA, and the hidden layer of the last dA represents the output.
Note that after pretraining, the SdA is dealt with as a normal MLP,
the dAs are only used to initialize the weights.
"""
def __init__(
self,
numpy_rng,
theano_rng=None,
n_ins=784,
hidden_layers_sizes=[500, 500],
n_outs=10,
corruption_levels=[0.1, 0.1]
):
""" This class is made to support a variable number of layers.
:type numpy_rng: numpy.random.RandomState
:param numpy_rng: numpy random number generator used to draw initial
weights
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
:param theano_rng: Theano random generator; if None is given one is
generated based on a seed drawn from `rng`
:type n_ins: int
:param n_ins: dimension of the input to the sdA
:type n_layers_sizes: list of ints
:param n_layers_sizes: intermediate layers size, must contain
at least one value
:type n_outs: int
:param n_outs: dimension of the output of the network
:type corruption_levels: list of float
:param corruption_levels: amount of corruption to use for each
layer
"""
self.sigmoid_layers = []
self.dA_layers = []
self.params = []
self.n_layers = len(hidden_layers_sizes)
assert self.n_layers > 0
if not theano_rng:
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
# allocate symbolic variables for the data
self.x = T.matrix('x') # the data is presented as rasterized images
self.y = T.ivector('y') # the labels are presented as 1D vector of
# [int] labels
# end-snippet-1
# The SdA is an MLP, for which all weights of intermediate layers
# are shared with a different denoising autoencoders
# We will first construct the SdA as a deep multilayer perceptron,
# and when constructing each sigmoidal layer we also construct a
# denoising autoencoder that shares weights with that layer
# During pretraining we will train these autoencoders (which will
# lead to chainging the weights of the MLP as well)
# During finetunining we will finish training the SdA by doing
# stochastich gradient descent on the MLP
# start-snippet-2
for i in xrange(self.n_layers):
# construct the sigmoidal layer
# the size of the input is either the number of hidden units of
# the layer below or the input size if we are on the first layer
if i == 0:
input_size = n_ins
else:
input_size = hidden_layers_sizes[i - 1]
# the input to this layer is either the activation of the hidden
# layer below or the input of the SdA if you are on the first
# layer
if i == 0:
layer_input = self.x
else:
layer_input = self.sigmoid_layers[-1].output
sigmoid_layer = HiddenLayer(rng=numpy_rng,
input=layer_input,
n_in=input_size,
n_out=hidden_layers_sizes[i],
activation=T.nnet.sigmoid)
# add the layer to our list of layers
self.sigmoid_layers.append(sigmoid_layer)
# its arguably a philosophical question...
# but we are going to only declare that the parameters of the
# sigmoid_layers are parameters of the StackedDAA
# the visible biases in the dA are parameters of those
# dA, but not the SdA
self.params.extend(sigmoid_layer.params)
# Construct a denoising autoencoder that shared weights with this
# layer
dA_layer = dA(numpy_rng=numpy_rng,
theano_rng=theano_rng,
input=layer_input,
n_visible=input_size,
n_hidden=hidden_layers_sizes[i],
W=sigmoid_layer.W,
bhid=sigmoid_layer.b)
self.dA_layers.append(dA_layer)
# end-snippet-2
# We now need to add a logistic layer on top of the MLP
self.logLayer = LogisticRegression(
input=self.sigmoid_layers[-1].output,
n_in=hidden_layers_sizes[-1],
n_out=n_outs
)
self.params.extend(self.logLayer.params)
# construct a function that implements one step of finetunining
# compute the cost for second phase of training,
# defined as the negative log likelihood
self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
# compute the gradients with respect to the model parameters
# symbolic variable that points to the number of errors made on the
# minibatch given by self.x and self.y
self.errors = self.logLayer.errors(self.y)
def pretraining_functions(self, train_set_x, batch_size):
''' Generates a list of functions, each of them implementing one
step in trainnig the dA corresponding to the layer with same index.
The function will require as input the minibatch index, and to train
a dA you just need to iterate, calling the corresponding function on
all minibatch indexes.
:type train_set_x: theano.tensor.TensorType
:param train_set_x: Shared variable that contains all datapoints used
for training the dA
:type batch_size: int
:param batch_size: size of a [mini]batch
:type learning_rate: float
:param learning_rate: learning rate used during training for any of
the dA layers
'''
# index to a [mini]batch
index = T.lscalar('index') # index to a minibatch
corruption_level = T.scalar('corruption') # % of corruption to use
learning_rate = T.scalar('lr') # learning rate to use
# begining of a batch, given `index`
batch_begin = index * batch_size
# ending of a batch given `index`
batch_end = batch_begin + batch_size
pretrain_fns = []
for dA in self.dA_layers:
# get the cost and the updates list
cost, updates = dA.get_cost_updates(corruption_level,
learning_rate)
# compile the theano function
fn = theano.function(
inputs=[
index,
theano.Param(corruption_level, default=0.2),
theano.Param(learning_rate, default=0.1)
],
outputs=cost,
updates=updates,
givens={
self.x: train_set_x[batch_begin: batch_end]
}
)
# append `fn` to the list of functions
pretrain_fns.append(fn)
return pretrain_fns
def build_finetune_functions(self, datasets, batch_size, learning_rate):
'''Generates a function `train` that implements one step of
finetuning, a function `validate` that computes the error on
a batch from the validation set, and a function `test` that
computes the error on a batch from the testing set
:type datasets: list of pairs of theano.tensor.TensorType
:param datasets: It is a list that contain all the datasets;
the has to contain three pairs, `train`,
`valid`, `test` in this order, where each pair
is formed of two Theano variables, one for the
datapoints, the other for the labels
:type batch_size: int
:param batch_size: size of a minibatch
:type learning_rate: float
:param learning_rate: learning rate used during finetune stage
'''
(train_set_x, train_set_y) = datasets[0]
(valid_set_x, valid_set_y) = datasets[1]
(test_set_x, test_set_y) = datasets[2]
# compute number of minibatches for training, validation and testing
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
n_valid_batches /= batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0]
n_test_batches /= batch_size
index = T.lscalar('index') # index to a [mini]batch
# compute the gradients with respect to the model parameters
gparams = T.grad(self.finetune_cost, self.params)
# compute list of fine-tuning updates
updates = [
(param, param - gparam * learning_rate)
for param, gparam in zip(self.params, gparams)
]
train_fn = theano.function(
inputs=[index],
outputs=self.finetune_cost,
updates=updates,
givens={
self.x: train_set_x[
index * batch_size: (index + 1) * batch_size
],
self.y: train_set_y[
index * batch_size: (index + 1) * batch_size
]
},
name='train'
)
test_score_i = theano.function(
[index],
self.errors,
givens={
self.x: test_set_x[
index * batch_size: (index + 1) * batch_size
],
self.y: test_set_y[
index * batch_size: (index + 1) * batch_size
]
},
name='test'
)
valid_score_i = theano.function(
[index],
self.errors,
givens={
self.x: valid_set_x[
index * batch_size: (index + 1) * batch_size
],
self.y: valid_set_y[
index * batch_size: (index + 1) * batch_size
]
},
name='valid'
)
# Create a function that scans the entire validation set
def valid_score():
return [valid_score_i(i) for i in xrange(n_valid_batches)]
# Create a function that scans the entire test set
def test_score():
return [test_score_i(i) for i in xrange(n_test_batches)]
return train_fn, valid_score, test_score
def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
pretrain_lr=0.001, training_epochs=1000,
dataset='mnist.pkl.gz', batch_size=1):
"""
Demonstrates how to train and test a stochastic denoising autoencoder.
This is demonstrated on MNIST.
:type learning_rate: float
:param learning_rate: learning rate used in the finetune stage
(factor for the stochastic gradient)
:type pretraining_epochs: int
:param pretraining_epochs: number of epoch to do pretraining
:type pretrain_lr: float
:param pretrain_lr: learning rate to be used during pre-training
:type n_iter: int
:param n_iter: maximal number of iterations ot run the optimizer
:type dataset: string
:param dataset: path the the pickled dataset
"""
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0]
n_train_batches /= batch_size
# numpy random generator
# start-snippet-3
numpy_rng = numpy.random.RandomState(89677)
print '... building the model'
# construct the stacked denoising autoencoder class
sda = SdA(
numpy_rng=numpy_rng,
n_ins=28 * 28,
hidden_layers_sizes=[1000, 1000, 1000],
n_outs=10
)
# end-snippet-3 start-snippet-4
#########################
# PRETRAINING THE MODEL #
#########################
print '... getting the pretraining functions'
pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x,
batch_size=batch_size)
print '... pre-training the model'
start_time = time.clock()
## Pre-train layer-wise
corruption_levels = [.1, .2, .3]
for i in xrange(sda.n_layers):
# go through pretraining epochs
for epoch in xrange(pretraining_epochs):
# go through the training set
c = []
for batch_index in xrange(n_train_batches):
c.append(pretraining_fns[i](index=batch_index,
corruption=corruption_levels[i],
lr=pretrain_lr))
print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch),
print numpy.mean(c)
end_time = time.clock()
print >> sys.stderr, ('The pretraining code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time) / 60.))
# end-snippet-4
########################
# FINETUNING THE MODEL #
########################
# get the training, validation and testing function for the model
print '... getting the finetuning functions'
train_fn, validate_model, test_model = sda.build_finetune_functions(
datasets=datasets,
batch_size=batch_size,
learning_rate=finetune_lr
)
print '... finetunning the model'
# early-stopping parameters
patience = 10 * n_train_batches # look as this many examples regardless
patience_increase = 2. # wait this much longer when a new best is
# found
improvement_threshold = 0.995 # a relative improvement of this much is
# considered significant
validation_frequency = min(n_train_batches, patience / 2)
# go through this many
# minibatche before checking the network
# on the validation set; in this case we
# check every epoch
best_validation_loss = numpy.inf
test_score = 0.
start_time = time.clock()
done_looping = False
epoch = 0
while (epoch < training_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_fn(minibatch_index)
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
validation_losses = validate_model()
this_validation_loss = numpy.mean(validation_losses)
print('epoch %i, minibatch %i/%i, validation error %f %%' %
(epoch, minibatch_index + 1, n_train_batches,
this_validation_loss * 100.))
# if we got the best validation score until now
if this_validation_loss < best_validation_loss:
#improve patience if loss improvement is good enough
if (
this_validation_loss < best_validation_loss *
improvement_threshold
):
patience = max(patience, iter * patience_increase)
# save best validation score and iteration number
best_validation_loss = this_validation_loss
best_iter = iter
# test it on the test set
test_losses = test_model()
test_score = numpy.mean(test_losses)
print((' epoch %i, minibatch %i/%i, test error of '
'best model %f %%') %
(epoch, minibatch_index + 1, n_train_batches,
test_score * 100.))
if patience <= iter:
done_looping = True
break
end_time = time.clock()
print(
(
'Optimization complete with best validation score of %f %%, '
'on iteration %i, '
'with test performance %f %%'
)
% (best_validation_loss * 100., best_iter + 1, test_score * 100.)
)
print >> sys.stderr, ('The training code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time) / 60.))
if __name__ == '__main__':
test_SdA()
================================================
FILE: DeepLearningTutorials/code/__init__.py
================================================
================================================
FILE: DeepLearningTutorials/code/cA.py
================================================
"""This tutorial introduces Contractive auto-encoders (cA) using Theano.
They are based on auto-encoders as the ones used in Bengio et
al. 2007. An autoencoder takes an input x and first maps it to a
hidden representation y = f_{\theta}(x) = s(Wx+b), parameterized by
\theta={W,b}. The resulting latent representation y is then mapped
back to a "reconstructed" vector z \in [0,1]^d in input space z =
g_{\theta'}(y) = s(W'y + b'). The weight matrix W' can optionally be
constrained such that W' = W^T, in which case the autoencoder is said
to have tied weights. The network is trained such that to minimize
the reconstruction error (the error between x and z). Adding the
squared Frobenius norm of the Jacobian of the hidden mapping h with
respect to the visible units yields the contractive auto-encoder:
- \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)]
+ \| \frac{\partial h(x)}{\partial x} \|^2
References :
- S. Rifai, P. Vincent, X. Muller, X. Glorot, Y. Bengio: Contractive
Auto-Encoders: Explicit Invariance During Feature Extraction, ICML-11
- S. Rifai, X. Muller, X. Glorot, G. Mesnil, Y. Bengio, and Pascal
Vincent. Learning invariant features through local space
contraction. Technical Report 1360, Universite de Montreal
- Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
Training of Deep Networks, Advances in Neural Information Processing
Systems 19, 2007
"""
import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from logistic_sgd import load_data
from utils import tile_raster_images
try:
import PIL.Image as Image
except ImportError:
import Image
class cA(object):
""" Contractive Auto-Encoder class (cA)
The contractive autoencoder tries to reconstruct the input with an
additional constraint on the latent space. With the objective of
obtaining a robust representation of the input space, we
regularize the L2 norm(Froebenius) of the jacobian of the hidden
representation with respect to the input. Please refer to Rifai et
al.,2011 for more details.
If x is the input then equation (1) computes the projection of the
input into the latent space h. Equation (2) computes the jacobian
of h with respect to x. Equation (3) computes the reconstruction
of the input, while equation (4) computes the reconstruction
error and the added regularization term from Eq.(2).
.. math::
h_i = s(W_i x + b_i) (1)
J_i = h_i (1 - h_i) * W_i (2)
x' = s(W' h + b') (3)
L = -sum_{k=1}^d [x_k \log x'_k + (1-x_k) \log( 1-x'_k)]
+ lambda * sum_{i=1}^d sum_{j=1}^n J_{ij}^2 (4)
"""
def __init__(self, numpy_rng, input=None, n_visible=784, n_hidden=100,
n_batchsize=1, W=None, bhid=None, bvis=None):
"""Initialize the cA class by specifying the number of visible units
(the dimension d of the input), the number of hidden units (the
dimension d' of the latent or hidden space) and the contraction level.
The constructor also receives symbolic variables for the input, weights
and bias.
:type numpy_rng: numpy.random.RandomState
:param numpy_rng: number random generator used to generate weights
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
:param theano_rng: Theano random generator; if None is given
one is generated based on a seed drawn from `rng`
:type input: theano.tensor.TensorType
:param input: a symbolic description of the input or None for
standalone cA
:type n_visible: int
:param n_visible: number of visible units
:type n_hidden: int
:param n_hidden: number of hidden units
:type n_batchsize int
:param n_batchsize: number of examples per batch
:type W: theano.tensor.TensorType
:param W: Theano variable pointing to a set of weights that should be
shared belong the dA and another architecture; if dA should
be standalone set this to None
:type bhid: theano.tensor.TensorType
:param bhid: Theano variable pointing to a set of biases values (for
hidden units) that should be shared belong dA and another
architecture; if dA should be standalone set this to None
:type bvis: theano.tensor.TensorType
:param bvis: Theano variable pointing to a set of biases values (for
visible units) that should be shared belong dA and another
architecture; if dA should be standalone set this to None
"""
self.n_visible = n_visible
self.n_hidden = n_hidden
self.n_batchsize = n_batchsize
# note : W' was written as `W_prime` and b' as `b_prime`
if not W:
# W is initialized with `initial_W` which is uniformely sampled
# from -4*sqrt(6./(n_visible+n_hidden)) and
# 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
# converted using asarray to dtype
# theano.config.floatX so that the code is runable on GPU
initial_W = numpy.asarray(
numpy_rng.uniform(
low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
size=(n_visible, n_hidden)
),
dtype=theano.config.floatX
)
W = theano.shared(value=initial_W, name='W', borrow=True)
if not bvis:
bvis = theano.shared(value=numpy.zeros(n_visible,
dtype=theano.config.floatX),
borrow=True)
if not bhid:
bhid = theano.shared(value=numpy.zeros(n_hidden,
dtype=theano.config.floatX),
name='b',
borrow=True)
self.W = W
# b corresponds to the bias of the hidden
self.b = bhid
# b_prime corresponds to the bias of the visible
self.b_prime = bvis
# tied weights, therefore W_prime is W transpose
self.W_prime = self.W.T
# if no input is given, generate a variable representing the input
if input is None:
# we use a matrix because we expect a minibatch of several
# examples, each example being a row
self.x = T.dmatrix(name='input')
else:
self.x = input
self.params = [self.W, self.b, self.b_prime]
def get_hidden_values(self, input):
""" Computes the values of the hidden layer """
return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
def get_jacobian(self, hidden, W):
"""Computes the jacobian of the hidden layer with respect to
the input, reshapes are necessary for broadcasting the
element-wise product on the right axis
"""
return T.reshape(hidden * (1 - hidden),
(self.n_batchsize, 1, self.n_hidden)) * T.reshape(
W, (1, self.n_visible, self.n_hidden))
def get_reconstructed_input(self, hidden):
"""Computes the reconstructed input given the values of the
hidden layer
"""
return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
def get_cost_updates(self, contraction_level, learning_rate):
""" This function computes the cost and the updates for one trainng
step of the cA """
y = self.get_hidden_values(self.x)
z = self.get_reconstructed_input(y)
J = self.get_jacobian(y, self.W)
# note : we sum over the size of a datapoint; if we are using
# minibatches, L will be a vector, with one entry per
# example in minibatch
self.L_rec = - T.sum(self.x * T.log(z) +
(1 - self.x) * T.log(1 - z),
axis=1)
# Compute the jacobian and average over the number of samples/minibatch
self.L_jacob = T.sum(J ** 2) / self.n_batchsize
# note : L is now a vector, where each element is the
# cross-entropy cost of the reconstruction of the
# corresponding example of the minibatch. We need to
# compute the average of all these to get the cost of
# the minibatch
cost = T.mean(self.L_rec) + contraction_level * T.mean(self.L_jacob)
# compute the gradients of the cost of the `cA` with respect
# to its parameters
gparams = T.grad(cost, self.params)
# generate the list of updates
updates = []
for param, gparam in zip(self.params, gparams):
updates.append((param, param - learning_rate * gparam))
return (cost, updates)
def test_cA(learning_rate=0.01, training_epochs=20,
dataset='mnist.pkl.gz',
batch_size=10, output_folder='cA_plots', contraction_level=.1):
"""
This demo is tested on MNIST
:type learning_rate: float
:param learning_rate: learning rate used for training the contracting
AutoEncoder
:type training_epochs: int
:param training_epochs: number of epochs used for training
:type dataset: string
:param dataset: path to the picked dataset
"""
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
x = T.matrix('x') # the data is presented as rasterized images
if not os.path.isdir(output_folder):
os.makedirs(output_folder)
os.chdir(output_folder)
####################################
# BUILDING THE MODEL #
####################################
rng = numpy.random.RandomState(123)
ca = cA(numpy_rng=rng, input=x,
n_visible=28 * 28, n_hidden=500, n_batchsize=batch_size)
cost, updates = ca.get_cost_updates(contraction_level=contraction_level,
learning_rate=learning_rate)
train_ca = theano.function(
[index],
[T.mean(ca.L_rec), ca.L_jacob],
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size]
}
)
start_time = time.clock()
############
# TRAINING #
############
# go through training epochs
for epoch in xrange(training_epochs):
# go through trainng set
c = []
for batch_index in xrange(n_train_batches):
c.append(train_ca(batch_index))
c_array = numpy.vstack(c)
print 'Training epoch %d, reconstruction cost ' % epoch, numpy.mean(
c_array[0]), ' jacobian norm ', numpy.mean(numpy.sqrt(c_array[1]))
end_time = time.clock()
training_time = (end_time - start_time)
print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] +
' ran for %.2fm' % ((training_time) / 60.))
image = Image.fromarray(tile_raster_images(
X=ca.W.get_value(borrow=True).T,
img_shape=(28, 28), tile_shape=(10, 10),
tile_spacing=(1, 1)))
image.save('cae_filters.png')
os.chdir('../')
if __name__ == '__main__':
test_cA()
================================================
FILE: DeepLearningTutorials/code/convolutional_mlp.py
================================================
"""This tutorial introduces the LeNet5 neural network architecture
using Theano. LeNet5 is a convolutional neural network, good for
classifying images. This tutorial shows how to build the architecture,
and comes with all the hyper-parameters you need to reproduce the
paper's MNIST results.
This implementation simplifies the model in the following ways:
- LeNetConvPool doesn't implement location-specific gain and bias parameters
- LeNetConvPool doesn't implement pooling by average, it implements pooling
by max.
- Digit classification is implemented with a logistic regression rather than
an RBF network
- LeNet5 was not fully-connected convolutions at second layer
References:
- Y. LeCun, L. Bottou, Y. Bengio and P. Haffner:
Gradient-Based Learning Applied to Document
Recognition, Proceedings of the IEEE, 86(11):2278-2324, November 1998.
http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
"""
import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from theano.tensor.signal import downsample
from theano.tensor.nnet import conv
from logistic_sgd import LogisticRegression, load_data
from mlp import HiddenLayer
class LeNetConvPoolLayer(object):
"""Pool Layer of a convolutional network """
def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2, 2)):
"""
Allocate a LeNetConvPoolLayer with shared variable internal parameters.
:type rng: numpy.random.RandomState
:param rng: a random number generator used to initialize weights
:type input: theano.tensor.dtensor4
:param input: symbolic image tensor, of shape image_shape
:type filter_shape: tuple or list of length 4
:param filter_shape: (number of filters, num input feature maps,
filter height, filter width)
:type image_shape: tuple or list of length 4
:param image_shape: (batch size, num input feature maps,
image height, image width)
:type poolsize: tuple or list of length 2
:param poolsize: the downsampling (pooling) factor (#rows, #cols)
"""
assert image_shape[1] == filter_shape[1]
self.input = input
# there are "num input feature maps * filter height * filter width"
# inputs to each hidden unit
fan_in = numpy.prod(filter_shape[1:])
# each unit in the lower layer receives a gradient from:
# "num output feature maps * filter height * filter width" /
# pooling size
fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
numpy.prod(poolsize))
# initialize weights with random weights
W_bound = numpy.sqrt(6. / (fan_in + fan_out))
self.W = theano.shared(
numpy.asarray(
rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
dtype=theano.config.floatX
),
borrow=True
)
# the bias is a 1D tensor -- one bias per output feature map
b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
self.b = theano.shared(value=b_values, borrow=True)
# convolve input feature maps with filters
conv_out = conv.conv2d(
input=input,
filters=self.W,
filter_shape=filter_shape,
image_shape=image_shape
)
# downsample each feature map individually, using maxpooling
pooled_out = downsample.max_pool_2d(
input=conv_out,
ds=poolsize,
ignore_border=True
)
# add the bias term. Since the bias is a vector (1D array), we first
# reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
# thus be broadcasted across mini-batches and feature map
# width & height
self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
# store parameters of this layer
self.params = [self.W, self.b]
def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
dataset='mnist.pkl.gz',
nkerns=[20, 50], batch_size=500):
""" Demonstrates lenet on MNIST dataset
:type learning_rate: float
:param learning_rate: learning rate used (factor for the stochastic
gradient)
:type n_epochs: int
:param n_epochs: maximal number of epochs to run the optimizer
:type dataset: string
:param dataset: path to the dataset used for training /testing (MNIST here)
:type nkerns: list of ints
:param nkerns: number of kernels on each layer
"""
rng = numpy.random.RandomState(23455)
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0]
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0]
n_test_batches = test_set_x.get_value(borrow=True).shape[0]
n_train_batches /= batch_size
n_valid_batches /= batch_size
n_test_batches /= batch_size
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
# start-snippet-1
x = T.matrix('x') # the data is presented as rasterized images
y = T.ivector('y') # the labels are presented as 1D vector of
# [int] labels
######################
# BUILD ACTUAL MODEL #
######################
print '... building the model'
# Reshape matrix of rasterized images of shape (batch_size, 28 * 28)
# to a 4D tensor, compatible with our LeNetConvPoolLayer
# (28, 28) is the size of MNIST images.
layer0_input = x.reshape((batch_size, 1, 28, 28))
# Construct the first convolutional pooling layer:
# filtering reduces the image size to (28-5+1 , 28-5+1) = (24, 24)
# maxpooling reduces this further to (24/2, 24/2) = (12, 12)
# 4D output tensor is thus of shape (batch_size, nkerns[0], 12, 12)
layer0 = LeNetConvPoolLayer(
rng,
input=layer0_input,
image_shape=(batch_size, 1, 28, 28),
filter_shape=(nkerns[0], 1, 5, 5),
poolsize=(2, 2)
)
# Construct the second convolutional pooling layer
# filtering reduces the image size to (12-5+1, 12-5+1) = (8, 8)
# maxpooling reduces this further to (8/2, 8/2) = (4, 4)
# 4D output tensor is thus of shape (batch_size, nkerns[1], 4, 4)
layer1 = LeNetConvPoolLayer(
rng,
input=layer0.output,
image_shape=(batch_size, nkerns[0], 12, 12),
filter_shape=(nkerns[1], nkerns[0], 5, 5),
poolsize=(2, 2)
)
# the HiddenLayer being fully-connected, it operates on 2D matrices of
# shape (batch_size, num_pixels) (i.e matrix of rasterized images).
# This will generate a matrix of shape (batch_size, nkerns[1] * 4 * 4),
# or (500, 50 * 4 * 4) = (500, 800) with the default values.
layer2_input = layer1.output.flatten(2)
# construct a fully-connected sigmoidal layer
layer2 = HiddenLayer(
rng,
input=layer2_input,
n_in=nkerns[1] * 4 * 4,
n_out=500,
activation=T.tanh
)
# classify the values of the fully-connected sigmoidal layer
layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10)
# the cost we minimize during training is the NLL of the model
cost = layer3.negative_log_likelihood(y)
# create a function to compute the mistakes that are made by the model
test_model = theano.function(
[index],
layer3.errors(y),
givens={
x: test_set_x[index * batch_size: (index + 1) * batch_size],
y: test_set_y[index * batch_size: (index + 1) * batch_size]
}
)
validate_model = theano.function(
[index],
layer3.errors(y),
givens={
x: valid_set_x[index * batch_size: (index + 1) * batch_size],
y: valid_set_y[index * batch_size: (index + 1) * batch_size]
}
)
# create a list of all model parameters to be fit by gradient descent
params = layer3.params + layer2.params + layer1.params + layer0.params
# create a list of gradients for all model parameters
grads = T.grad(cost, params)
# train_model is a function that updates the model parameters by
# SGD Since this model has many parameters, it would be tedious to
# manually create an update rule for each model parameter. We thus
# create the updates list by automatically looping over all
# (params[i], grads[i]) pairs.
updates = [
(param_i, param_i - learning_rate * grad_i)
for param_i, grad_i in zip(params, grads)
]
train_model = theano.function(
[index],
cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
# end-snippet-1
###############
# TRAIN MODEL #
###############
print '... training'
# early-stopping parameters
patience = 10000 # look as this many examples regardless
patience_increase = 2 # wait this much longer when a new best is
# found
improvement_threshold = 0.995 # a relative improvement of this much is
# considered significant
validation_frequency = min(n_train_batches, patience / 2)
# go through this many
# minibatche before checking the network
# on the validation set; in this case we
# check every epoch
best_validation_loss = numpy.inf
best_iter = 0
test_score = 0.
start_time = time.clock()
epoch = 0
done_looping = False
while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
iter = (epoch - 1) * n_train_batches + minibatch_index
if iter % 100 == 0:
print 'training @ iter = ', iter
cost_ij = train_model(minibatch_index)
if (iter + 1) % validation_frequency == 0:
# compute zero-one loss on validation set
validation_losses = [validate_model(i) for i
in xrange(n_valid_batches)]
this_validation_loss = numpy.mean(validation_losses)
print('epoch %i, minibatch %i/%i, validation error %f %%' %
(epoch, minibatch_index + 1, n_train_batches,
this_validation_loss * 100.))
# if we got the best validation score until now
if this_validation_loss < best_validation_loss:
#improve patience if loss improvement is good enough
if this_validation_loss < best_validation_loss * \
improvement_threshold:
patience = max(patience, iter * patience_increase)
# save best validation score and iteration number
best_validation_loss = this_validation_loss
best_iter = iter
# test it on the test set
test_losses = [
test_model(i)
for i in xrange(n_test_batches)
]
test_score = numpy.mean(test_losses)
print((' epoch %i, minibatch %i/%i, test error of '
'best model %f %%') %
(epoch, minibatch_index + 1, n_train_batches,
test_score * 100.))
if patience <= iter:
done_looping = True
break
end_time = time.clock()
print('Optimization complete.')
print('Best validation score of %f %% obtained at iteration %i, '
'with test performance %f %%' %
(best_validation_loss * 100., best_iter + 1, test_score * 100.))
print >> sys.stderr, ('The code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time) / 60.))
if __name__ == '__main__':
evaluate_lenet5()
def experiment(state, channel):
evaluate_lenet5(state.learning_rate, dataset=state.dataset)
================================================
FILE: DeepLearningTutorials/code/dA.py
================================================
"""
This tutorial introduces denoising auto-encoders (dA) using Theano.
Denoising autoencoders are the building blocks for SdA.
They are based on auto-encoders as the ones used in Bengio et al. 2007.
An autoencoder takes an input x and first maps it to a hidden representation
y = f_{\theta}(x) = s(Wx+b), parameterized by \theta={W,b}. The resulting
latent representation y is then mapped back to a "reconstructed" vector
z \in [0,1]^d in input space z = g_{\theta'}(y) = s(W'y + b'). The weight
matrix W' can optionally be constrained such that W' = W^T, in which case
the autoencoder is said to have tied weights. The network is trained such
that to minimize the reconstruction error (the error between x and z).
For the denosing autoencoder, during training, first x is corrupted into
\tilde{x}, where \tilde{x} is a partially destroyed version of x by means
of a stochastic mapping. Afterwards y is computed as before (using
\tilde{x}), y = s(W\tilde{x} + b) and z as s(W'y + b'). The reconstruction
error is now measured between z and the uncorrupted input x, which is
computed as the cross-entropy :
- \sum_{k=1}^d[ x_k \log z_k + (1-x_k) \log( 1-z_k)]
References :
- P. Vincent, H. Larochelle, Y. Bengio, P.A. Manzagol: Extracting and
Composing Robust Features with Denoising Autoencoders, ICML'08, 1096-1103,
2008
- Y. Bengio, P. Lamblin, D. Popovici, H. Larochelle: Greedy Layer-Wise
Training of Deep Networks, Advances in Neural Information Processing
Systems 19, 2007
"""
import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from logistic_sgd import load_data
from utils import tile_raster_images
try:
import PIL.Image as Image
except ImportError:
import Image
# start-snippet-1
class dA(object):
"""Denoising Auto-Encoder class (dA)
A denoising autoencoders tries to reconstruct the input from a corrupted
version of it by projecting it first in a latent space and reprojecting
it afterwards back in the input space. Please refer to Vincent et al.,2008
for more details. If x is the input then equation (1) computes a partially
destroyed version of x by means of a stochastic mapping q_D. Equation (2)
computes the projection of the input into the latent space. Equation (3)
computes the reconstruction of the input, while equation (4) computes the
reconstruction error.
.. math::
\tilde{x} ~ q_D(\tilde{x}|x) (1)
y = s(W \tilde{x} + b) (2)
x = s(W' y + b') (3)
L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)] (4)
"""
def __init__(
self,
numpy_rng,
theano_rng=None,
input=None,
n_visible=784,
n_hidden=500,
W=None,
bhid=None,
bvis=None
):
"""
Initialize the dA class by specifying the number of visible units (the
dimension d of the input ), the number of hidden units ( the dimension
d' of the latent or hidden space ) and the corruption level. The
constructor also receives symbolic variables for the input, weights and
bias. Such a symbolic variables are useful when, for example the input
is the result of some computations, or when weights are shared between
the dA and an MLP layer. When dealing with SdAs this always happens,
the dA on layer 2 gets as input the output of the dA on layer 1,
and the weights of the dA are used in the second stage of training
to construct an MLP.
:type numpy_rng: numpy.random.RandomState
:param numpy_rng: number random generator used to generate weights
:type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
:param theano_rng: Theano random generator; if None is given one is
generated based on a seed drawn from `rng`
:type input: theano.tensor.TensorType
:param input: a symbolic description of the input or None for
standalone dA
:type n_visible: int
:param n_visible: number of visible units
:type n_hidden: int
:param n_hidden: number of hidden units
:type W: theano.tensor.TensorType
:param W: Theano variable pointing to a set of weights that should be
shared belong the dA and another architecture; if dA should
be standalone set this to None
:type bhid: theano.tensor.TensorType
:param bhid: Theano variable pointing to a set of biases values (for
hidden units) that should be shared belong dA and another
architecture; if dA should be standalone set this to None
:type bvis: theano.tensor.TensorType
:param bvis: Theano variable pointing to a set of biases values (for
visible units) that should be shared belong dA and another
architecture; if dA should be standalone set this to None
"""
self.n_visible = n_visible
self.n_hidden = n_hidden
# create a Theano random generator that gives symbolic random values
if not theano_rng:
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
# note : W' was written as `W_prime` and b' as `b_prime`
if not W:
# W is initialized with `initial_W` which is uniformely sampled
# from -4*sqrt(6./(n_visible+n_hidden)) and
# 4*sqrt(6./(n_hidden+n_visible))the output of uniform if
# converted using asarray to dtype
# theano.config.floatX so that the code is runable on GPU
initial_W = numpy.asarray(
numpy_rng.uniform(
low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
size=(n_visible, n_hidden)
),
dtype=theano.config.floatX
)
W = theano.shared(value=initial_W, name='W', borrow=True)
if not bvis:
bvis = theano.shared(
value=numpy.zeros(
n_visible,
dtype=theano.config.floatX
),
borrow=True
)
if not bhid:
bhid = theano.shared(
value=numpy.zeros(
n_hidden,
dtype=theano.config.floatX
),
name='b',
borrow=True
)
self.W = W
# b corresponds to the bias of the hidden
self.b = bhid
# b_prime corresponds to the bias of the visible
self.b_prime = bvis
# tied weights, therefore W_prime is W transpose
self.W_prime = self.W.T
self.theano_rng = theano_rng
# if no input is given, generate a variable representing the input
if input is None:
# we use a matrix because we expect a minibatch of several
# examples, each example being a row
self.x = T.dmatrix(name='input')
else:
self.x = input
self.params = [self.W, self.b, self.b_prime]
# end-snippet-1
def get_corrupted_input(self, input, corruption_level):
"""This function keeps ``1-corruption_level`` entries of the inputs the
same and zero-out randomly selected subset of size ``coruption_level``
Note : first argument of theano.rng.binomial is the shape(size) of
random numbers that it should produce
second argument is the number of trials
third argument is the probability of success of any trial
this will produce an array of 0s and 1s where 1 has a
probability of 1 - ``corruption_level`` and 0 with
``corruption_level``
The binomial function return int64 data type by
default. int64 multiplicated by the input
type(floatX) always return float64. To keep all data
in floatX when floatX is float32, we set the dtype of
the binomial to floatX. As in our case the value of
the binomial is always 0 or 1, this don't change the
result. This is needed to allow the gpu to work
correctly as it only support float32 for now.
"""
return self.theano_rng.binomial(size=input.shape, n=1,
p=1 - corruption_level,
dtype=theano.config.floatX) * input
def get_hidden_values(self, input):
""" Computes the values of the hidden layer """
return T.nnet.sigmoid(T.dot(input, self.W) + self.b)
def get_reconstructed_input(self, hidden):
"""Computes the reconstructed input given the values of the
hidden layer
"""
return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)
def get_cost_updates(self, corruption_level, learning_rate):
""" This function computes the cost and the updates for one trainng
step of the dA """
tilde_x = self.get_corrupted_input(self.x, corruption_level)
y = self.get_hidden_values(tilde_x)
z = self.get_reconstructed_input(y)
# note : we sum over the size of a datapoint; if we are using
# minibatches, L will be a vector, with one entry per
# example in minibatch
L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)
# note : L is now a vector, where each element is the
# cross-entropy cost of the reconstruction of the
# corresponding example of the minibatch. We need to
# compute the average of all these to get the cost of
# the minibatch
cost = T.mean(L)
# compute the gradients of the cost of the `dA` with respect
# to its parameters
gparams = T.grad(cost, self.params)
# generate the list of updates
updates = [
(param, param - learning_rate * gparam)
for param, gparam in zip(self.params, gparams)
]
return (cost, updates)
def test_dA(learning_rate=0.1, training_epochs=15,
dataset='mnist.pkl.gz',
batch_size=20, output_folder='dA_plots'):
"""
This demo is tested on MNIST
:type learning_rate: float
:param learning_rate: learning rate used for training the DeNosing
AutoEncoder
:type training_epochs: int
:param training_epochs: number of epochs used for training
:type dataset: string
:param dataset: path to the picked dataset
"""
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
x = T.matrix('x') # the data is presented as rasterized images
if not os.path.isdir(output_folder):
os.makedirs(output_folder)
os.chdir(output_folder)
####################################
# BUILDING THE MODEL NO CORRUPTION #
####################################
rng = numpy.random.RandomState(123)
theano_rng = RandomStreams(rng.randint(2 ** 30))
da = dA(
numpy_rng=rng,
theano_rng=theano_rng,
input=x,
n_visible=28 * 28,
n_hidden=500
)
cost, updates = da.get_cost_updates(
corruption_level=0.,
learning_rate=learning_rate
)
train_da = theano.function(
[index],
cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size]
}
)
start_time = time.clock()
############
# TRAINING #
############
# go through training epochs
for epoch in xrange(training_epochs):
# go through trainng set
c = []
for batch_index in xrange(n_train_batches):
c.append(train_da(batch_index))
print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
end_time = time.clock()
training_time = (end_time - start_time)
print >> sys.stderr, ('The no corruption code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((training_time) / 60.))
image = Image.fromarray(
tile_raster_images(X=da.W.get_value(borrow=True).T,
img_shape=(28, 28), tile_shape=(10, 10),
tile_spacing=(1, 1)))
image.save('filters_corruption_0.png')
#####################################
# BUILDING THE MODEL CORRUPTION 30% #
#####################################
rng = numpy.random.RandomState(123)
theano_rng = RandomStreams(rng.randint(2 ** 30))
da = dA(
numpy_rng=rng,
theano_rng=theano_rng,
input=x,
n_visible=28 * 28,
n_hidden=500
)
cost, updates = da.get_cost_updates(
corruption_level=0.3,
learning_rate=learning_rate
)
train_da = theano.function(
[index],
cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size]
}
)
start_time = time.clock()
############
# TRAINING #
############
# go through training epochs
for epoch in xrange(training_epochs):
# go through trainng set
c = []
for batch_index in xrange(n_train_batches):
c.append(train_da(batch_index))
print 'Training epoch %d, cost ' % epoch, numpy.mean(c)
end_time = time.clock()
training_time = (end_time - start_time)
print >> sys.stderr, ('The 30% corruption code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % (training_time / 60.))
image = Image.fromarray(tile_raster_images(
X=da.W.get_value(borrow=True).T,
img_shape=(28, 28), tile_shape=(10, 10),
tile_spacing=(1, 1)))
image.save('filters_corruption_30.png')
os.chdir('../')
if __name__ == '__main__':
test_dA()
================================================
FILE: DeepLearningTutorials/code/hmc/__init__.py
================================================
================================================
FILE: DeepLearningTutorials/code/hmc/hmc.py
================================================
"""
TODO
"""
import numpy
from theano import function, shared
from theano import tensor as TT
import theano
sharedX = (lambda X, name:
shared(numpy.asarray(X, dtype=theano.config.floatX), name=name))
def kinetic_energy(vel):
"""Returns the kinetic energy associated with the given velocity
and mass of 1.
Parameters
----------
vel: theano matrix
Symbolic matrix whose rows are velocity vectors.
Returns
-------
return: theano vector
Vector whose i-th entry is the kinetic entry associated with vel[i].
"""
return 0.5 * (vel ** 2).sum(axis=1)
def hamiltonian(pos, vel, energy_fn):
"""
Returns the Hamiltonian (sum of potential and kinetic energy) for the given
velocity and position.
Parameters
----------
pos: theano matrix
Symbolic matrix whose rows are position vectors.
vel: theano matrix
Symbolic matrix whose rows are velocity vectors.
energy_fn: python function
Python function, operating on symbolic theano variables, used tox
compute the potential energy at a given position.
Returns
-------
return: theano vector
Vector whose i-th entry is the Hamiltonian at position pos[i] and
velocity vel[i].
"""
# assuming mass is 1
return energy_fn(pos) + kinetic_energy(vel)
def metropolis_hastings_accept(energy_prev, energy_next, s_rng):
"""
Performs a Metropolis-Hastings accept-reject move.
Parameters
----------
energy_prev: theano vector
Symbolic theano tensor which contains the energy associated with the
configuration at time-step t.
energy_next: theano vector
Symbolic theano tensor which contains the energy associated with the
proposed configuration at time-step t+1.
s_rng: theano.tensor.shared_randomstreams.RandomStreams
Theano shared random stream object used to generate the random number
used in proposal.
Returns
-------
return: boolean
True if move is accepted, False otherwise
"""
ediff = energy_prev - energy_next
return (TT.exp(ediff) - s_rng.uniform(size=energy_prev.shape)) >= 0
def simulate_dynamics(initial_pos, initial_vel, stepsize, n_steps, energy_fn):
"""
Return final (position, velocity) obtained after an `n_steps` leapfrog
updates, using Hamiltonian dynamics.
Parameters
----------
initial_pos: shared theano matrix
Initial position at which to start the simulation
initial_vel: shared theano matrix
Initial velocity of particles
stepsize: shared theano scalar
Scalar value controlling amount by which to move
energy_fn: python function
Python function, operating on symbolic theano variables, used to
compute the potential energy at a given position.
Returns
-------
rval1: theano matrix
Final positions obtained after simulation
rval2: theano matrix
Final velocity obtained after simulation
"""
def leapfrog(pos, vel, step):
"""
Inside loop of Scan. Performs one step of leapfrog update, using
Hamiltonian dynamics.
Parameters
----------
pos: theano matrix
in leapfrog update equations, represents pos(t), position at time t
vel: theano matrix
in leapfrog update equations, represents vel(t - stepsize/2),
velocity at time (t - stepsize/2)
step: theano scalar
scalar value controlling amount by which to move
Returns
-------
rval1: [theano matrix, theano matrix]
Symbolic theano matrices for new position pos(t + stepsize), and
velocity vel(t + stepsize/2)
rval2: dictionary
Dictionary of updates for the Scan Op
"""
# from pos(t) and vel(t-stepsize/2), compute vel(t+stepsize/2)
dE_dpos = TT.grad(energy_fn(pos).sum(), pos)
new_vel = vel - step * dE_dpos
# from vel(t+stepsize/2) compute pos(t+stepsize)
new_pos = pos + step * new_vel
return [new_pos, new_vel], {}
# compute velocity at time-step: t + stepsize/2
initial_energy = energy_fn(initial_pos)
dE_dpos = TT.grad(initial_energy.sum(), initial_pos)
vel_half_step = initial_vel - 0.5 * stepsize * dE_dpos
# compute position at time-step: t + stepsize
pos_full_step = initial_pos + stepsize * vel_half_step
# perform leapfrog updates: the scan op is used to repeatedly compute
# vel(t + (m-1/2)*stepsize) and pos(t + m*stepsize) for m in [2,n_steps].
(all_pos, all_vel), scan_updates = theano.scan(
leapfrog,
outputs_info=[
dict(initial=pos_full_step),
dict(initial=vel_half_step),
],
non_sequences=[stepsize],
n_steps=n_steps - 1)
final_pos = all_pos[-1]
final_vel = all_vel[-1]
# NOTE: Scan always returns an updates dictionary, in case the
# scanned function draws samples from a RandomStream. These
# updates must then be used when compiling the Theano function, to
# avoid drawing the same random numbers each time the function is
# called. In this case however, we consciously ignore
# "scan_updates" because we know it is empty.
assert not scan_updates
# The last velocity returned by scan is vel(t +
# (n_steps - 1 / 2) * stepsize) We therefore perform one more half-step
# to return vel(t + n_steps * stepsize)
energy = energy_fn(final_pos)
final_vel = final_vel - 0.5 * stepsize * TT.grad(energy.sum(), final_pos)
# return new proposal state
return final_pos, final_vel
# start-snippet-1
def hmc_move(s_rng, positions, energy_fn, stepsize, n_steps):
"""
This function performs one-step of Hybrid Monte-Carlo sampling. We start by
sampling a random velocity from a univariate Gaussian distribution, perform
`n_steps` leap-frog updates using Hamiltonian dynamics and accept-reject
using Metropolis-Hastings.
Parameters
----------
s_rng: theano shared random stream
Symbolic random number generator used to draw random velocity and
perform accept-reject move.
positions: shared theano matrix
Symbolic matrix whose rows are position vectors.
energy_fn: python function
Python function, operating on symbolic theano variables, used to
compute the potential energy at a given position.
stepsize: shared theano scalar
Shared variable containing the stepsize to use for `n_steps` of HMC
simulation steps.
n_steps: integer
Number of HMC steps to perform before proposing a new position.
Returns
-------
rval1: boolean
True if move is accepted, False otherwise
rval2: theano matrix
Matrix whose rows contain the proposed "new position"
"""
# end-snippet-1 start-snippet-2
# sample random velocity
initial_vel = s_rng.normal(size=positions.shape)
# end-snippet-2 start-snippet-3
# perform simulation of particles subject to Hamiltonian dynamics
final_pos, final_vel = simulate_dynamics(
initial_pos=positions,
initial_vel=initial_vel,
stepsize=stepsize,
n_steps=n_steps,
energy_fn=energy_fn
)
# end-snippet-3 start-snippet-4
# accept/reject the proposed move based on the joint distribution
accept = metropolis_hastings_accept(
energy_prev=hamiltonian(positions, initial_vel, energy_fn),
energy_next=hamiltonian(final_pos, final_vel, energy_fn),
s_rng=s_rng
)
# end-snippet-4
return accept, final_pos
# start-snippet-5
def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, accept,
target_acceptance_rate, stepsize_inc, stepsize_dec,
stepsize_min, stepsize_max, avg_acceptance_slowness):
"""This function is executed after `n_steps` of HMC sampling
(`hmc_move` function). It creates the updates dictionary used by
the `simulate` function. It takes care of updating: the position
(if the move is accepted), the stepsize (to track a given target
acceptance rate) and the average acceptance rate (computed as a
moving average).
Parameters
----------
positions: shared variable, theano matrix
Shared theano matrix whose rows contain the old position
stepsize: shared variable, theano scalar
Shared theano scalar containing current step size
avg_acceptance_rate: shared variable, theano scalar
Shared theano scalar containing the current average acceptance rate
final_pos: shared variable, theano matrix
Shared theano matrix whose rows contain the new position
accept: theano scalar
Boolean-type variable representing whether or not the proposed HMC move
should be accepted or not.
target_acceptance_rate: float
The stepsize is modified in order to track this target acceptance rate.
stepsize_inc: float
Amount by which to increment stepsize when acceptance rate is too high.
stepsize_dec: float
Amount by which to decrement stepsize when acceptance rate is too low.
stepsize_min: float
Lower-bound on `stepsize`.
stepsize_min: float
Upper-bound on `stepsize`.
avg_acceptance_slowness: float
Average acceptance rate is computed as an exponential moving average.
(1-avg_acceptance_slowness) is the weight given to the newest
observation.
Returns
-------
rval1: dictionary-like
A dictionary of updates to be used by the `HMC_Sampler.simulate`
function. The updates target the position, stepsize and average
acceptance rate.
"""
## POSITION UPDATES ##
# broadcast `accept` scalar to tensor with the same dimensions as
# final_pos.
accept_matrix = accept.dimshuffle(0, *(('x',) * (final_pos.ndim - 1)))
# if accept is True, update to `final_pos` else stay put
new_positions = TT.switch(accept_matrix, final_pos, positions)
# end-snippet-5 start-snippet-7
## STEPSIZE UPDATES ##
# if acceptance rate is too low, our sampler is too "noisy" and we reduce
# the stepsize. If it is too high, our sampler is too conservative, we can
# get away with a larger stepsize (resulting in better mixing).
_new_stepsize = TT.switch(avg_acceptance_rate > target_acceptance_rate,
stepsize * stepsize_inc, stepsize * stepsize_dec)
# maintain stepsize in [stepsize_min, stepsize_max]
new_stepsize = TT.clip(_new_stepsize, stepsize_min, stepsize_max)
# end-snippet-7 start-snippet-6
## ACCEPT RATE UPDATES ##
# perform exponential moving average
mean_dtype = theano.scalar.upcast(accept.dtype, avg_acceptance_rate.dtype)
new_acceptance_rate = TT.add(
avg_acceptance_slowness * avg_acceptance_rate,
(1.0 - avg_acceptance_slowness) * accept.mean(dtype=mean_dtype))
# end-snippet-6 start-snippet-8
return [(positions, new_positions),
(stepsize, new_stepsize),
(avg_acceptance_rate, new_acceptance_rate)]
# end-snippet-8
class HMC_sampler(object):
"""
Convenience wrapper for performing Hybrid Monte Carlo (HMC). It creates the
symbolic graph for performing an HMC simulation (using `hmc_move` and
`hmc_updates`). The graph is then compiled into the `simulate` function, a
theano function which runs the simulation and updates the required shared
variables.
Users should interface with the sampler thorugh the `draw` function which
advances the markov chain and returns the current sample by calling
`simulate` and `get_position` in sequence.
The hyper-parameters are the same as those used by Marc'Aurelio's
'train_mcRBM.py' file (available on his personal home page).
"""
def __init__(self, **kwargs):
self.__dict__.update(kwargs)
@classmethod
def new_from_shared_positions(
cls,
shared_positions,
energy_fn,
initial_stepsize=0.01,
target_acceptance_rate=.9,
n_steps=20,
stepsize_dec=0.98,
stepsize_min=0.001,
stepsize_max=0.25,
stepsize_inc=1.02,
# used in geometric avg. 1.0 would be not moving at all
avg_acceptance_slowness=0.9,
seed=12345
):
"""
:param shared_positions: theano ndarray shared var with
many particle [initial] positions
:param energy_fn:
callable such that energy_fn(positions)
returns theano vector of energies.
The len of this vector is the batchsize.
The sum of this energy vector must be differentiable (with
theano.tensor.grad) with respect to the positions for HMC
sampling to work.
"""
# allocate shared variables
stepsize = sharedX(initial_stepsize, 'hmc_stepsize')
avg_acceptance_rate = sharedX(target_acceptance_rate,
'avg_acceptance_rate')
s_rng = TT.shared_randomstreams.RandomStreams(seed)
# define graph for an `n_steps` HMC simulation
accept, final_pos = hmc_move(
s_rng,
shared_positions,
energy_fn,
stepsize,
n_steps)
# define the dictionary of updates, to apply on every `simulate` call
simulate_updates = hmc_updates(
shared_positions,
stepsize,
avg_acceptance_rate,
final_pos=final_pos,
accept=accept,
stepsize_min=stepsize_min,
stepsize_max=stepsize_max,
stepsize_inc=stepsize_inc,
stepsize_dec=stepsize_dec,
target_acceptance_rate=target_acceptance_rate,
avg_acceptance_slowness=avg_acceptance_slowness)
# compile theano function
simulate = function([], [], updates=simulate_updates)
# create HMC_sampler object with the following attributes ...
return cls(
positions=shared_positions,
stepsize=stepsize,
stepsize_min=stepsize_min,
stepsize_max=stepsize_max,
avg_acceptance_rate=avg_acceptance_rate,
target_acceptance_rate=target_acceptance_rate,
s_rng=s_rng,
_updates=simulate_updates,
simulate=simulate)
def draw(self, **kwargs):
"""
Returns a new position obtained after `n_steps` of HMC simulation.
Parameters
----------
kwargs: dictionary
The `kwargs` dictionary is passed to the shared variable
(self.positions) `get_value()` function. For example, to avoid
copying the shared variable value, consider passing `borrow=True`.
Returns
-------
rval: numpy matrix
Numpy matrix whose of dimensions similar to `initial_position`.
"""
self.simulate()
return self.positions.get_value(borrow=False)
================================================
FILE: DeepLearningTutorials/code/hmc/test_hmc.py
================================================
import numpy
from scipy import linalg
import theano
from hmc import HMC_sampler
def sampler_on_nd_gaussian(sampler_cls, burnin, n_samples, dim=10):
batchsize = 3
rng = numpy.random.RandomState(123)
# Define a covariance and mu for a gaussian
mu = numpy.array(rng.rand(dim) * 10, dtype=theano.config.floatX)
cov = numpy.array(rng.rand(dim, dim), dtype=theano.config.floatX)
cov = (cov + cov.T) / 2.
cov[numpy.arange(dim), numpy.arange(dim)] = 1.0
cov_inv = linalg.inv(cov)
# Define energy function for a multi-variate Gaussian
def gaussian_energy(x):
return 0.5 * (theano.tensor.dot((x - mu), cov_inv) *
(x - mu)).sum(axis=1)
# Declared shared random variable for positions
position = rng.randn(batchsize, dim).astype(theano.config.floatX)
position = theano.shared(position)
# Create HMC sampler
sampler = sampler_cls(position, gaussian_energy,
initial_stepsize=1e-3, stepsize_max=0.5)
# Start with a burn-in process
garbage = [sampler.draw() for r in xrange(burnin)] # burn-in Draw
# `n_samples`: result is a 3D tensor of dim [n_samples, batchsize,
# dim]
_samples = numpy.asarray([sampler.draw() for r in xrange(n_samples)])
# Flatten to [n_samples * batchsize, dim]
samples = _samples.T.reshape(dim, -1).T
print '****** TARGET VALUES ******'
print 'target mean:', mu
print 'target cov:\n', cov
print '****** EMPIRICAL MEAN/COV USING HMC ******'
print 'empirical mean: ', samples.mean(axis=0)
print 'empirical_cov:\n', numpy.cov(samples.T)
print '****** HMC INTERNALS ******'
print 'final stepsize', sampler.stepsize.get_value()
print 'final acceptance_rate', sampler.avg_acceptance_rate.get_value()
return sampler
def test_hmc():
sampler = sampler_on_nd_gaussian(HMC_sampler.new_from_shared_positions,
burnin=1000, n_samples=1000, dim=5)
assert abs(sampler.avg_acceptance_rate.get_value() -
sampler.target_acceptance_rate) < .1
assert sampler.stepsize.get_value() >= sampler.stepsize_min
assert sampler.stepsize.get_value() <= sampler.stepsize_max
================================================
FILE: DeepLearningTutorials/code/imdb.py
================================================
import cPickle
import gzip
import os
import numpy
import theano
def prepare_data(seqs, labels, maxlen=None):
"""Create the matrices from the datasets.
This pad each sequence to the same lenght: the lenght of the
longuest sequence or maxlen.
if maxlen is set, we will cut all sequence to this maximum
lenght.
This swap the axis!
"""
# x: a list of sentences
lengths = [len(s) for s in seqs]
if maxlen is not None:
new_seqs = []
new_labels = []
new_lengths = []
for l, s, y in zip(lengths, seqs, labels):
if l < maxlen:
new_seqs.append(s)
new_labels.append(y)
new_lengths.append(l)
lengths = new_lengths
labels = new_labels
seqs = new_seqs
if len(lengths) < 1:
return None, None, None
n_samples = len(seqs)
maxlen = numpy.max(lengths)
x = numpy.zeros((maxlen, n_samples)).astype('int64')
x_mask = numpy.zeros((maxlen, n_samples)).astype(theano.config.floatX)
for idx, s in enumerate(seqs):
x[:lengths[idx], idx] = s
x_mask[:lengths[idx], idx] = 1.
return x, x_mask, labels
def get_dataset_file(dataset, default_dataset, origin):
'''Look for it as if it was a full path, if not, try local file,
if not try in the data directory.
Download dataset if it is not present
'''
data_dir, data_file = os.path.split(dataset)
if data_dir == "" and not os.path.isfile(dataset):
# Check if dataset is in the data directory.
new_path = os.path.join(
os.path.split(__file__)[0],
"..",
"data",
dataset
)
if os.path.isfile(new_path) or data_file == default_dataset:
dataset = new_path
if (not os.path.isfile(dataset)) and data_file == default_dataset:
import urllib
print 'Downloading data from %s' % origin
urllib.urlretrieve(origin, dataset)
return dataset
def load_data(path="imdb.pkl", n_words=100000, valid_portion=0.1, maxlen=None,
sort_by_len=True):
'''Loads the dataset
:type path: String
:param path: The path to the dataset (here IMDB)
:type n_words: int
:param n_words: The number of word to keep in the vocabulary.
All extra words are set to unknow (1).
:type valid_portion: float
:param valid_portion: The proportion of the full train set used for
the validation set.
:type maxlen: None or positive int
:param maxlen: the max sequence length we use in the train/valid set.
:type sort_by_len: bool
:name sort_by_len: Sort by the sequence lenght for the train,
valid and test set. This allow faster execution as it cause
less padding per minibatch. Another mechanism must be used to
shuffle the train set at each epoch.
'''
#############
# LOAD DATA #
#############
# Load the dataset
path = get_dataset_file(
path, "imdb.pkl",
"http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
if path.endswith(".gz"):
f = gzip.open(path, 'rb')
else:
f = open(path, 'rb')
train_set = cPickle.load(f)
test_set = cPickle.load(f)
f.close()
if maxlen:
new_train_set_x = []
new_train_set_y = []
for x, y in zip(train_set[0], train_set[1]):
if len(x) < maxlen:
new_train_set_x.append(x)
new_train_set_y.append(y)
train_set = (new_train_set_x, new_train_set_y)
del new_train_set_x, new_train_set_y
# split training set into validation set
train_set_x, train_set_y = train_set
n_samples = len(train_set_x)
sidx = numpy.random.permutation(n_samples)
n_train = int(numpy.round(n_samples * (1. - valid_portion)))
valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
train_set_x = [train_set_x[s] for s in sidx[:n_train]]
train_set_y = [train_set_y[s] for s in sidx[:n_train]]
train_set = (train_set_x, train_set_y)
valid_set = (valid_set_x, valid_set_y)
def remove_unk(x):
return [[1 if w >= n_words else w for w in sen] for sen in x]
test_set_x, test_set_y = test_set
valid_set_x, valid_set_y = valid_set
train_set_x, train_set_y = train_set
train_set_x = remove_unk(train_set_x)
valid_set_x = remove_unk(valid_set_x)
test_set_x = remove_unk(test_set_x)
def len_argsort(seq):
return sorted(range(len(seq)), key=lambda x: len(seq[x]))
if sort_by_len:
sorted_index = len_argsort(test_set_x)
test_set_x = [test_set_x[i] for i in sorted_index]
test_set_y = [test_set_y[i] for i in sorted_index]
sorted_index = len_argsort(valid_set_x)
valid_set_x = [valid_set_x[i] for i in sorted_index]
valid_set_y = [valid_set_y[i] for i in sorted_index]
sorted_index = len_argsort(train_set_x)
train_set_x = [train_set_x[i] for i in sorted_index]
train_set_y = [train_set_y[i] for i in sorted_index]
train = (train_set_x, train_set_y)
valid = (valid_set_x, valid_set_y)
test = (test_set_x, test_set_y)
return train, valid, test
================================================
FILE: DeepLearningTutorials/code/imdb_preprocess.py
================================================
"""
This script is what created the dataset pickled.
1) You need to download this file and put it in the same directory as this file.
https://github.com/moses-smt/mosesdecoder/raw/master/scripts/tokenizer/tokenizer.perl . Give it execution permission.
2) Get the dataset from http://ai.stanford.edu/~amaas/data/sentiment/ and extract it in the current directory.
3) Then run this script.
"""
dataset_path='/Tmp/bastienf/aclImdb/'
import numpy
import cPickle as pkl
from collections import OrderedDict
import glob
import os
from subprocess import Popen, PIPE
# tokenizer.perl is from Moses: https://github.com/moses-smt/mosesdecoder/tree/master/scripts/tokenizer
tokenizer_cmd = ['./tokenizer.perl', '-l', 'en', '-q', '-']
def tokenize(sentences):
print 'Tokenizing..',
text = "\n".join(sentences)
tokenizer = Popen(tokenizer_cmd, stdin=PIPE, stdout=PIPE)
tok_text, _ = tokenizer.communicate(text)
toks = tok_text.split('\n')[:-1]
print 'Done'
return toks
def build_dict(path):
sentences = []
currdir = os.getcwd()
os.chdir('%s/pos/' % path)
for ff in glob.glob("*.txt"):
with open(ff, 'r') as f:
sentences.append(f.readline().strip())
os.chdir('%s/neg/' % path)
for ff in glob.glob("*.txt"):
with open(ff, 'r') as f:
sentences.append(f.readline().strip())
os.chdir(currdir)
sentences = tokenize(sentences)
print 'Building dictionary..',
wordcount = dict()
for ss in sentences:
words = ss.strip().lower().split()
for w in words:
if w not in wordcount:
wordcount[w] = 1
else:
wordcount[w] += 1
counts = wordcount.values()
keys = wordcount.keys()
sorted_idx = numpy.argsort(counts)[::-1]
worddict = dict()
for idx, ss in enumerate(sorted_idx):
worddict[keys[ss]] = idx+2 # leave 0 and 1 (UNK)
print numpy.sum(counts), ' total words ', len(keys), ' unique words'
return worddict
def grab_data(path, dictionary):
sentences = []
currdir = os.getcwd()
os.chdir(path)
for ff in glob.glob("*.txt"):
with open(ff, 'r') as f:
sentences.append(f.readline().strip())
os.chdir(currdir)
sentences = tokenize(sentences)
seqs = [None] * len(sentences)
for idx, ss in enumerate(sentences):
words = ss.strip().lower().split()
seqs[idx] = [dictionary[w] if w in dictionary else 1 for w in words]
return seqs
def main():
# Get the dataset from http://ai.stanford.edu/~amaas/data/sentiment/
path = dataset_path
dictionary = build_dict(os.path.join(path, 'train'))
train_x_pos = grab_data(path+'train/pos', dictionary)
train_x_neg = grab_data(path+'train/neg', dictionary)
train_x = train_x_pos + train_x_neg
train_y = [1] * len(train_x_pos) + [0] * len(train_x_neg)
test_x_pos = grab_data(path+'test/pos', dictionary)
test_x_neg = grab_data(path+'test/neg', dictionary)
test_x = test_x_pos + test_x_neg
test_y = [1] * len(test_x_pos) + [0] * len(test_x_neg)
f = open('imdb.pkl', 'wb')
pkl.dump((train_x, train_y), f, -1)
pkl.dump((test_x, test_y), f, -1)
f.close()
f = open('imdb.dict.pkl', 'wb')
pkl.dump(dictionary, f, -1)
f.close()
if __name__ == '__main__':
main()
================================================
FILE: DeepLearningTutorials/code/logistic_cg.py
================================================
"""
This tutorial introduces logistic regression using Theano and conjugate
gradient descent.
Logistic regression is a probabilistic, linear classifier. It is parametrized
by a weight matrix :math:`W` and a bias vector :math:`b`. Classification is
done by projecting data points onto a set of hyperplanes, the distance to
which is used to determine a class membership probability.
Mathematically, this can be written as:
.. math::
P(Y=i|x, W,b) &= softmax_i(W x + b) \\
&= \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}}
The output of the model or prediction is then done by taking the argmax of
the vector whose i'th element is P(Y=i|x).
.. math::
y_{pred} = argmax_i P(Y=i|x,W,b)
This tutorial presents a conjugate gradient optimization method that is
suitable for smaller datasets.
References:
- textbooks: "Pattern Recognition and Machine Learning" -
Christopher M. Bishop, section 4.3.2
"""
__docformat__ = 'restructedtext en'
import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from logistic_sgd import load_data
class LogisticRegression(object):
"""Multi-class Logistic Regression Class
The logistic regression is fully described by a weight matrix :math:`W`
and bias vector :math:`b`. Classification is done by projecting data
points onto a set of hyperplanes, the distance to which is used to
determine a class membership probability.
"""
def __init__(self, input, n_in, n_out):
""" Initialize the parameters of the logistic regression
:type input: theano.tensor.TensorType
:param input: symbolic variable that describes the input of the
architecture ( one minibatch)
:type n_in: int
:param n_in: number of input units, the dimension of the space in
which the datapoint lies
:type n_out: int
:param n_out: number of output units, the dimension of the space in
which the target lies
"""
# initialize theta = (W,b) with 0s; W gets the shape (n_in, n_out),
# while b is a vector of n_out elements, making theta a vector of
# n_in*n_out + n_out elements
self.theta = theano.shared(
value=numpy.zeros(
n_in * n_out + n_out,
dtype=theano.config.floatX
),
name='theta',
borrow=True
)
# W is represented by the fisr n_in*n_out elements of theta
self.W = self.theta[0:n_in * n_out].reshape((n_in, n_out))
# b is the rest (last n_out elements)
self.b = self.theta[n_in * n_out:n_in * n_out + n_out]
# compute vector of class-membership probabilities in symbolic form
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
# compute prediction as class whose probability is maximal in
# symbolic form
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
def negative_log_likelihood(self, y):
"""Return the negative log-likelihood of the prediction of this model
under a given target distribution.
.. math::
\frac{1}{|\mathcal{D}|}\mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
\frac{1}{|\mathcal{D}|}\sum_{i=0}^{|\mathcal{D}|}
\log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
\ell (\theta=\{W,b\}, \mathcal{D})
:type y: theano.tensor.TensorType
:param y: corresponds to a vector that gives for each example the
correct label
"""
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
def errors(self, y):
"""Return a float representing the number of errors in the minibatch
over the total number of examples of the minibatch
:type y: theano.tensor.TensorType
:param y: corresponds to a vector that gives for each example
the correct label
"""
# check if y has same dimension of y_pred
if y.ndim != self.y_pred.ndim:
raise TypeError(
'y should have the same shape as self.y_pred',
('y', y.type, 'y_pred', self.y_pred.type)
)
# check if y is of the correct datatype
if y.dtype.startswith('int'):
# the T.neq operator returns a vector of 0s and 1s, where 1
# represents a mistake in prediction
return T.mean(T.neq(self.y_pred, y))
else:
raise NotImplementedError()
def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
"""Demonstrate conjugate gradient optimization of a log-linear model
This is demonstrated on MNIST.
:type n_epochs: int
:param n_epochs: number of epochs to run the optimizer
:type mnist_pkl_gz: string
:param mnist_pkl_gz: the path of the mnist training file from
http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
"""
#############
# LOAD DATA #
#############
datasets = load_data(mnist_pkl_gz)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
batch_size = 600 # size of the minibatch
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
n_in = 28 * 28 # number of input units
n_out = 10 # number of output units
######################
# BUILD ACTUAL MODEL #
######################
print '... building the model'
# allocate symbolic variables for the data
minibatch_offset = T.lscalar() # offset to the start of a [mini]batch
x = T.matrix() # the data is presented as rasterized images
y = T.ivector() # the labels are presented as 1D vector of
# [int] labels
# construct the logistic regression class
classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)
# the cost we minimize during training is the negative log likelihood of
# the model in symbolic format
cost = classifier.negative_log_likelihood(y).mean()
# compile a theano function that computes the mistakes that are made by
# the model on a minibatch
test_model = theano.function(
[minibatch_offset],
classifier.errors(y),
givens={
x: test_set_x[minibatch_offset:minibatch_offset + batch_size],
y: test_set_y[minibatch_offset:minibatch_offset + batch_size]
},
name="test"
)
validate_model = theano.function(
[minibatch_offset],
classifier.errors(y),
givens={
x: valid_set_x[minibatch_offset: minibatch_offset + batch_size],
y: valid_set_y[minibatch_offset: minibatch_offset + batch_size]
},
name="validate"
)
# compile a theano function that returns the cost of a minibatch
batch_cost = theano.function(
[minibatch_offset],
cost,
givens={
x: train_set_x[minibatch_offset: minibatch_offset + batch_size],
y: train_set_y[minibatch_offset: minibatch_offset + batch_size]
},
name="batch_cost"
)
# compile a theano function that returns the gradient of the minibatch
# with respect to theta
batch_grad = theano.function(
[minibatch_offset],
T.grad(cost, classifier.theta),
givens={
x: train_set_x[minibatch_offset: minibatch_offset + batch_size],
y: train_set_y[minibatch_offset: minibatch_offset + batch_size]
},
name="batch_grad"
)
# creates a function that computes the average cost on the training set
def train_fn(theta_value):
classifier.theta.set_value(theta_value, borrow=True)
train_losses = [batch_cost(i * batch_size)
for i in xrange(n_train_batches)]
return numpy.mean(train_losses)
# creates a function that computes the average gradient of cost with
# respect to theta
def train_fn_grad(theta_value):
classifier.theta.set_value(theta_value, borrow=True)
grad = batch_grad(0)
for i in xrange(1, n_train_batches):
grad += batch_grad(i * batch_size)
return grad / n_train_batches
validation_scores = [numpy.inf, 0]
# creates the validation function
def callback(theta_value):
classifier.theta.set_value(theta_value, borrow=True)
#compute the validation loss
validation_losses = [validate_model(i * batch_size)
for i in xrange(n_valid_batches)]
this_validation_loss = numpy.mean(validation_losses)
print('validation error %f %%' % (this_validation_loss * 100.,))
# check if it is better then best validation score got until now
if this_validation_loss < validation_scores[0]:
# if so, replace the old one, and compute the score on the
# testing dataset
validation_scores[0] = this_validation_loss
test_losses = [test_model(i * batch_size)
for i in xrange(n_test_batches)]
validation_scores[1] = numpy.mean(test_losses)
###############
# TRAIN MODEL #
###############
# using scipy conjugate gradient optimizer
import scipy.optimize
print ("Optimizing using scipy.optimize.fmin_cg...")
start_time = time.clock()
best_w_b = scipy.optimize.fmin_cg(
f=train_fn,
x0=numpy.zeros((n_in + 1) * n_out, dtype=x.dtype),
fprime=train_fn_grad,
callback=callback,
disp=0,
maxiter=n_epochs
)
end_time = time.clock()
print(
(
'Optimization complete with best validation score of %f %%, with '
'test performance %f %%'
)
% (validation_scores[0] * 100., validation_scores[1] * 100.)
)
print >> sys.stderr, ('The code for file ' +
os.path.split(__file__)[1] +
' ran for %.1fs' % ((end_time - start_time)))
if __name__ == '__main__':
cg_optimization_mnist()
================================================
FILE: DeepLearningTutorials/code/logistic_sgd.py
================================================
"""
This tutorial introduces logistic regression using Theano and stochastic
gradient descent.
Logistic regression is a probabilistic, linear classifier. It is parametrized
by a weight matrix :math:`W` and a bias vector :math:`b`. Classification is
done by projecting data points onto a set of hyperplanes, the distance to
which is used to determine a class membership probability.
Mathematically, this can be written as:
.. math::
P(Y=i|x, W,b) &= softmax_i(W x + b) \\
&= \frac {e^{W_i x + b_i}} {\sum_j e^{W_j x + b_j}}
The output of the model or prediction is then done by taking the argmax of
the vector whose i'th element is P(Y=i|x).
.. math::
y_{pred} = argmax_i P(Y=i|x,W,b)
This tutorial presents a stochastic gradient descent optimization method
suitable for large datasets.
References:
- textbooks: "Pattern Recognition and Machine Learning" -
Christopher M. Bishop, section 4.3.2
"""
__docformat__ = 'restructedtext en'
import cPickle
import gzip
import os
import sys
import time
import numpy
import theano
import theano.tensor as T
class LogisticRegression(object):
"""Multi-class Logistic Regression Class
The logistic regression is fully described by a weight matrix :math:`W`
and bias vector :math:`b`. Classification is done by projecting data
points onto a set of hyperplanes, the distance to which is used to
determine a class membership probability.
"""
def __init__(self, input, n_in, n_out):
""" Initialize the parameters of the logistic regression
:type input: theano.tensor.TensorType
:param input: symbolic variable that describes the input of the
architecture (one minibatch)
:type n_in: int
:param n_in: number of input units, the dimension of the space in
which the datapoints lie
:type n_out: int
:param n_out: number of output units, the dimension of the space in
which the labels lie
"""
# start-snippet-1
# initialize with 0 the weights W as a matrix of shape (n_in, n_out)
self.W = theano.shared(
value=numpy.zeros(
(n_in, n_out),
dtype=theano.config.floatX
),
name='W',
borrow=True
)
# initialize the baises b as a vector of n_out 0s
self.b = theano.shared(
value=numpy.zeros(
(n_out,),
dtype=theano.config.floatX
),
name='b',
borrow=True
)
# symbolic expression for computing the matrix of class-membership
# probabilities
# Where:
# W is a matrix where column-k represent the separation hyper plain for
# class-k
# x is a matrix where row-j represents input training sample-j
# b is a vector where element-k represent the free parameter of hyper
# plain-k
self.p_y_given_x = T.nnet.softmax(T.dot(input, self.W) + self.b)
# symbolic description of how to compute prediction as class whose
# probability is maximal
self.y_pred = T.argmax(self.p_y_given_x, axis=1)
# end-snippet-1
# parameters of the model
self.params = [self.W, self.b]
def negative_log_likelihood(self, y):
"""Return the mean of the negative log-likelihood of the prediction
of this model under a given target distribution.
.. math::
\frac{1}{|\mathcal{D}|} \mathcal{L} (\theta=\{W,b\}, \mathcal{D}) =
\frac{1}{|\mathcal{D}|} \sum_{i=0}^{|\mathcal{D}|}
\log(P(Y=y^{(i)}|x^{(i)}, W,b)) \\
\ell (\theta=\{W,b\}, \mathcal{D})
:type y: theano.tensor.TensorType
:param y: corresponds to a vector that gives for each example the
correct label
Note: we use the mean instead of the sum so that
the learning rate is less dependent on the batch size
"""
# start-snippet-2
# y.shape[0] is (symbolically) the number of rows in y, i.e.,
# number of examples (call it n) in the minibatch
# T.arange(y.shape[0]) is a symbolic vector which will contain
# [0,1,2,... n-1] T.log(self.p_y_given_x) is a matrix of
# Log-Probabilities (call it LP) with one row per example and
# one column per class LP[T.arange(y.shape[0]),y] is a vector
# v containing [LP[0,y[0]], LP[1,y[1]], LP[2,y[2]], ...,
# LP[n-1,y[n-1]]] and T.mean(LP[T.arange(y.shape[0]),y]) is
# the mean (across minibatch examples) of the elements in v,
# i.e., the mean log-likelihood across the minibatch.
return -T.mean(T.log(self.p_y_given_x)[T.arange(y.shape[0]), y])
# end-snippet-2
def errors(self, y):
"""Return a float representing the number of errors in the minibatch
over the total number of examples of the minibatch ; zero one
loss over the size of the minibatch
:type y: theano.tensor.TensorType
:param y: corresponds to a vector that gives for each example the
correct label
"""
# check if y has same dimension of y_pred
if y.ndim != self.y_pred.ndim:
raise TypeError(
'y should have the same shape as self.y_pred',
('y', y.type, 'y_pred', self.y_pred.type)
)
# check if y is of the correct datatype
if y.dtype.startswith('int'):
# the T.neq operator returns a vector of 0s and 1s, where 1
# represents a mistake in prediction
return T.mean(T.neq(self.y_pred, y))
else:
raise NotImplementedError()
def load_data(dataset):
''' Loads the dataset
:type dataset: string
:param dataset: the path to the dataset (here MNIST)
'''
#############
# LOAD DATA #
#############
# Download the MNIST dataset if it is not present
data_dir, data_file = os.path.split(dataset)
if data_dir == "" and not os.path.isfile(dataset):
# Check if dataset is in the data directory.
new_path = os.path.join(
os.path.split(__file__)[0],
"..",
"data",
dataset
)
if os.path.isfile(new_path) or data_file == 'mnist.pkl.gz':
dataset = new_path
if (not os.path.isfile(dataset)) and data_file == 'mnist.pkl.gz':
import urllib
origin = (
'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
)
print 'Downloading data from %s' % origin
urllib.urlretrieve(origin, dataset)
print '... loading data'
# Load the dataset
f = gzip.open(dataset, 'rb')
train_set, valid_set, test_set = cPickle.load(f)
f.close()
#train_set, valid_set, test_set format: tuple(input, target)
#input is an numpy.ndarray of 2 dimensions (a matrix)
#witch row's correspond to an example. target is a
#numpy.ndarray of 1 dimensions (vector)) that have the same length as
#the number of rows in the input. It should give the target
#target to the example with the same index in the input.
def shared_dataset(data_xy, borrow=True):
""" Function that loads the dataset into shared variables
The reason we store our dataset in shared variables is to allow
Theano to copy it into the GPU memory (when code is run on GPU).
Since copying data into the GPU is slow, copying a minibatch everytime
is needed (the default behaviour if the data is not in a shared
variable) would lead to a large decrease in performance.
"""
data_x, data_y = data_xy
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
borrow=borrow)
shared_y = theano.shared(numpy.asarray(data_y,
dtype=theano.config.floatX),
borrow=borrow)
# When storing data on the GPU it has to be stored as floats
# therefore we will store the labels as ``floatX`` as well
# (``shared_y`` does exactly that). But during our computations
# we need them as ints (we use labels as index, and if they are
# floats it doesn't make sense) therefore instead of returning
# ``shared_y`` we will have to cast it to int. This little hack
# lets ous get around this issue
return shared_x, T.cast(shared_y, 'int32')
test_set_x, test_set_y = shared_dataset(test_set)
valid_set_x, valid_set_y = shared_dataset(valid_set)
train_set_x, train_set_y = shared_dataset(train_set)
rval = [(train_set_x, train_set_y), (valid_set_x, valid_set_y),
(test_set_x, test_set_y)]
return rval
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
dataset='mnist.pkl.gz',
batch_size=600):
"""
Demonstrate stochastic gradient descent optimization of a log-linear
model
This is demonstrated on MNIST.
:type learning_rate: float
:param learning_rate: learning rate used (factor for the stochastic
gradient)
:type n_epochs: int
:param n_epochs: maximal number of epochs to run the optimizer
:type dataset: string
:param dataset: the path of the MNIST dataset file from
http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
"""
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
######################
# BUILD ACTUAL MODEL #
######################
print '... building the model'
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
# generate symbolic variables for input (x and y represent a
# minibatch)
x = T.matrix('x') # data, presented as rasterized images
y = T.ivector('y') # labels, presented as 1D vector of [int] labels
# construct the logistic regression class
# Each MNIST image has size 28*28
classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10)
# the cost we minimize during training is the negative log likelihood of
# the model in symbolic format
cost = classifier.negative_log_likelihood(y)
# compiling a Theano function that computes the mistakes that are made by
# the model on a minibatch
test_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: test_set_x[index * batch_size: (index + 1) * batch_size],
y: test_set_y[index * batch_size: (index + 1) * batch_size]
}
)
validate_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: valid_set_x[index * batch_size: (index + 1) * batch_size],
y: valid_set_y[index * batch_size: (index + 1) * batch_size]
}
)
# compute the gradient of cost with respect to theta = (W,b)
g_W = T.grad(cost=cost, wrt=classifier.W)
g_b = T.grad(cost=cost, wrt=classifier.b)
# start-snippet-3
# specify how to update the parameters of the model as a list of
# (variable, update expression) pairs.
updates = [(classifier.W, classifier.W - learning_rate * g_W),
(classifier.b, classifier.b - learning_rate * g_b)]
# compiling a Theano function `train_model` that returns the cost, but in
# the same time updates the parameter of the model based on the rules
# defined in `updates`
train_model = theano.function(
inputs=[index],
outputs=cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
# end-snippet-3
###############
# TRAIN MODEL #
###############
print '... training the model'
# early-stopping parameters
patience = 5000 # look as this many examples regardless
patience_increase = 2 # wait this much longer when a new best is
# found
improvement_threshold = 0.995 # a relative improvement of this much is
# considered significant
validation_frequency = min(n_train_batches, patience / 2)
# go through this many
# minibatche before checking the network
# on the validation set; in this case we
# check every epoch
best_validation_loss = numpy.inf
test_score = 0.
start_time = time.clock()
done_looping = False
epoch = 0
while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)
# iteration number
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
# compute zero-one loss on validation set
validation_losses = [validate_model(i)
for i in xrange(n_valid_batches)]
this_validation_loss = numpy.mean(validation_losses)
print(
'epoch %i, minibatch %i/%i, validation error %f %%' %
(
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)
# if we got the best validation score until now
if this_validation_loss < best_validation_loss:
#improve patience if loss improvement is good enough
if this_validation_loss < best_validation_loss * \
improvement_threshold:
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
# test it on the test set
test_losses = [test_model(i)
for i in xrange(n_test_batches)]
test_score = numpy.mean(test_losses)
print(
(
' epoch %i, minibatch %i/%i, test error of'
' best model %f %%'
) %
(
epoch,
minibatch_index + 1,
n_train_batches,
test_score * 100.
)
)
if patience <= iter:
done_looping = True
break
end_time = time.clock()
print(
(
'Optimization complete with best validation score of %f %%,'
'with test performance %f %%'
)
% (best_validation_loss * 100., test_score * 100.)
)
print 'The code run for %d epochs, with %f epochs/sec' % (
epoch, 1. * epoch / (end_time - start_time))
print >> sys.stderr, ('The code for file ' +
os.path.split(__file__)[1] +
' ran for %.1fs' % ((end_time - start_time)))
if __name__ == '__main__':
sgd_optimization_mnist()
================================================
FILE: DeepLearningTutorials/code/lstm.py
================================================
'''
Build a tweet sentiment analyzer
'''
from collections import OrderedDict
import cPickle as pkl
import random
import sys
import time
import numpy
import theano
from theano import config
import theano.tensor as tensor
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
import imdb
datasets = {'imdb': (imdb.load_data, imdb.prepare_data)}
def numpy_floatX(data):
return numpy.asarray(data, dtype=config.floatX)
def get_minibatches_idx(n, minibatch_size, shuffle=False):
"""
Used to shuffle the dataset at each iteration.
"""
idx_list = numpy.arange(n, dtype="int32")
if shuffle:
random.shuffle(idx_list)
minibatches = []
minibatch_start = 0
for i in range(n // minibatch_size):
minibatches.append(idx_list[minibatch_start:
minibatch_start + minibatch_size])
minibatch_start += minibatch_size
if (minibatch_start != n):
# Make a minibatch out of what is left
minibatches.append(idx_list[minibatch_start:])
return zip(range(len(minibatches)), minibatches)
def get_dataset(name):
return datasets[name][0], datasets[name][1]
def zipp(params, tparams):
"""
When we reload the model. Needed for the GPU stuff.
"""
for kk, vv in params.iteritems():
tparams[kk].set_value(vv)
def unzip(zipped):
"""
When we pickle the model. Needed for the GPU stuff.
"""
new_params = OrderedDict()
for kk, vv in zipped.iteritems():
new_params[kk] = vv.get_value()
return new_params
def dropout_layer(state_before, use_noise, trng):
proj = tensor.switch(use_noise,
(state_before *
trng.binomial(state_before.shape,
p=0.5, n=1,
dtype=state_before.dtype)),
state_before * 0.5)
return proj
def _p(pp, name):
return '%s_%s' % (pp, name)
def init_params(options):
"""
Global (not LSTM) parameter. For the embeding and the classifier.
"""
params = OrderedDict()
# embedding
randn = numpy.random.rand(options['n_words'],
options['dim_proj'])
params['Wemb'] = (0.01 * randn).astype(config.floatX)
params = get_layer(options['encoder'])[0](options,
params,
prefix=options['encoder'])
# classifier
params['U'] = 0.01 * numpy.random.randn(options['dim_proj'],
options['ydim']).astype(config.floatX)
params['b'] = numpy.zeros((options['ydim'],)).astype(config.floatX)
return params
def load_params(path, params):
pp = numpy.load(path)
for kk, vv in params.iteritems():
if kk not in pp:
raise Warning('%s is not in the archive' % kk)
params[kk] = pp[kk]
return params
def init_tparams(params):
tparams = OrderedDict()
for kk, pp in params.iteritems():
tparams[kk] = theano.shared(params[kk], name=kk)
return tparams
def get_layer(name):
fns = layers[name]
return fns
def ortho_weight(ndim):
W = numpy.random.randn(ndim, ndim)
u, s, v = numpy.linalg.svd(W)
return u.astype(config.floatX)
def param_init_lstm(options, params, prefix='lstm'):
"""
Init the LSTM parameter:
:see: init_params
"""
W = numpy.concatenate([ortho_weight(options['dim_proj']),
ortho_weight(options['dim_proj']),
ortho_weight(options['dim_proj']),
ortho_weight(options['dim_proj'])], axis=1)
params[_p(prefix, 'W')] = W
U = numpy.concatenate([ortho_weight(options['dim_proj']),
ortho_weight(options['dim_proj']),
ortho_weight(options['dim_proj']),
ortho_weight(options['dim_proj'])], axis=1)
params[_p(prefix, 'U')] = U
b = numpy.zeros((4 * options['dim_proj'],))
params[_p(prefix, 'b')] = b.astype(config.floatX)
return params
def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
nsteps = state_below.shape[0]
if state_below.ndim == 3:
n_samples = state_below.shape[1]
else:
n_samples = 1
assert mask is not None
def _slice(_x, n, dim):
if _x.ndim == 3:
return _x[:, :, n * dim:(n + 1) * dim]
return _x[:, n * dim:(n + 1) * dim]
def _step(m_, x_, h_, c_):
preact = tensor.dot(h_, tparams[_p(prefix, 'U')])
preact += x_
preact += tparams[_p(prefix, 'b')]
i = tensor.nnet.sigmoid(_slice(preact, 0, options['dim_proj']))
f = tensor.nnet.sigmoid(_slice(preact, 1, options['dim_proj']))
o = tensor.nnet.sigmoid(_slice(preact, 2, options['dim_proj']))
c = tensor.tanh(_slice(preact, 3, options['dim_proj']))
c = f * c_ + i * c
c = m_[:, None] * c + (1. - m_)[:, None] * c_
h = o * tensor.tanh(c)
h = m_[:, None] * h + (1. - m_)[:, None] * h_
return h, c
state_below = (tensor.dot(state_below, tparams[_p(prefix, 'W')]) +
tparams[_p(prefix, 'b')])
dim_proj = options['dim_proj']
rval, updates = theano.scan(_step,
sequences=[mask, state_below],
outputs_info=[tensor.alloc(numpy_floatX(0.),
n_samples,
dim_proj),
tensor.alloc(numpy_floatX(0.),
n_samples,
dim_proj)],
name=_p(prefix, '_layers'),
n_steps=nsteps)
return rval[0]
# ff: Feed Forward (normal neural net), only useful to put after lstm
# before the classifier.
layers = {'lstm': (param_init_lstm, lstm_layer)}
def sgd(lr, tparams, grads, x, mask, y, cost):
""" Stochastic Gradient Descent
:note: A more complicated version of sgd then needed. This is
done like that for adadelta and rmsprop.
"""
# New set of shared variable that will contain the gradient
# for a mini-batch.
gshared = [theano.shared(p.get_value() * 0., name='%s_grad' % k)
for k, p in tparams.iteritems()]
gsup = [(gs, g) for gs, g in zip(gshared, grads)]
# Function that computes gradients for a mini-batch, but do not
# updates the weights.
f_grad_shared = theano.function([x, mask, y], cost, updates=gsup,
name='sgd_f_grad_shared')
pup = [(p, p - lr * g) for p, g in zip(tparams.values(), gshared)]
# Function that updates the weights from the previously computed
# gradient.
f_update = theano.function([lr], [], updates=pup,
name='sgd_f_update')
return f_grad_shared, f_update
def adadelta(lr, tparams, grads, x, mask, y, cost):
zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_grad' % k)
for k, p in tparams.iteritems()]
running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_rup2' % k)
for k, p in tparams.iteritems()]
running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_rgrad2' % k)
for k, p in tparams.iteritems()]
zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
for rg2, g in zip(running_grads2, grads)]
f_grad_shared = theano.function([x, mask, y], cost, updates=zgup + rg2up,
name='adadelta_f_grad_shared')
updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg
for zg, ru2, rg2 in zip(zipped_grads,
running_up2,
running_grads2)]
ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
for ru2, ud in zip(running_up2, updir)]
param_up = [(p, p + ud) for p, ud in zip(tparams.values(), updir)]
f_update = theano.function([lr], [], updates=ru2up + param_up,
on_unused_input='ignore',
name='adadelta_f_update')
return f_grad_shared, f_update
def rmsprop(lr, tparams, grads, x, mask, y, cost):
zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_grad' % k)
for k, p in tparams.iteritems()]
running_grads = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_rgrad' % k)
for k, p in tparams.iteritems()]
running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_rgrad2' % k)
for k, p in tparams.iteritems()]
zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
for rg2, g in zip(running_grads2, grads)]
f_grad_shared = theano.function([x, mask, y], cost,
updates=zgup + rgup + rg2up,
name='rmsprop_f_grad_shared')
updir = [theano.shared(p.get_value() * numpy_floatX(0.),
name='%s_updir' % k)
for k, p in tparams.iteritems()]
updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4))
for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads,
running_grads2)]
param_up = [(p, p + udn[1])
for p, udn in zip(tparams.values(), updir_new)]
f_update = theano.function([lr], [], updates=updir_new + param_up,
on_unused_input='ignore',
name='rmsprop_f_update')
return f_grad_shared, f_update
def build_model(tparams, options):
trng = RandomStreams(1234)
# Used for dropout.
use_noise = theano.shared(numpy_floatX(0.))
x = tensor.matrix('x', dtype='int64')
mask = tensor.matrix('mask', dtype=config.floatX)
y = tensor.vector('y', dtype='int64')
n_timesteps = x.shape[0]
n_samples = x.shape[1]
emb = tparams['Wemb'][x.flatten()].reshape([n_timesteps,
n_samples,
options['dim_proj']])
proj = get_layer(options['encoder'])[1](tparams, emb, options,
prefix=options['encoder'],
mask=mask)
if options['encoder'] == 'lstm':
proj = (proj * mask[:, :, None]).sum(axis=0)
proj = proj / mask.sum(axis=0)[:, None]
if options['use_dropout']:
proj = dropout_layer(proj, use_noise, trng)
pred = tensor.nnet.softmax(tensor.dot(proj, tparams['U']) + tparams['b'])
f_pred_prob = theano.function([x, mask], pred, name='f_pred_prob')
f_pred = theano.function([x, mask], pred.argmax(axis=1), name='f_pred')
cost = -tensor.log(pred[tensor.arange(n_samples), y] + 1e-8).mean()
return use_noise, x, mask, y, f_pred_prob, f_pred, cost
def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
""" If you want to use a trained model, this is useful to compute
the probabilities of new examples.
"""
n_samples = len(data[0])
probs = numpy.zeros((n_samples, 2)).astype(config.floatX)
n_done = 0
for _, valid_index in iterator:
x, mask, y = prepare_data([data[0][t] for t in valid_index],
numpy.array(data[1])[valid_index],
maxlen=None)
pred_probs = f_pred_prob(x, mask)
probs[valid_index, :] = pred_probs
n_done += len(valid_index)
if verbose:
print '%d/%d samples classified' % (n_done, n_samples)
return probs
def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
"""
Just compute the error
f_pred: Theano fct computing the prediction
prepare_data: usual prepare_data for that dataset.
"""
valid_err = 0
for _, valid_index in iterator:
x, mask, y = prepare_data([data[0][t] for t in valid_index],
numpy.array(data[1])[valid_index],
maxlen=None)
preds = f_pred(x, mask)
targets = numpy.array(data[1])[valid_index]
valid_err += (preds == targets).sum()
valid_err = 1. - numpy_floatX(valid_err) / len(data[0])
return valid_err
def train_lstm(
dim_proj=128, # word embeding dimension and LSTM number of hidden units.
patience=10, # Number of epoch to wait before early stop if no progress
max_epochs=5000, # The maximum number of epoch to run
dispFreq=10, # Display to stdout the training progress every N updates
decay_c=0., # Weight decay for the classifier applied to the U weights.
lrate=0.0001, # Learning rate for sgd (not used for adadelta and rmsprop)
n_words=10000, # Vocabulary size
optimizer=adadelta, # sgd, adadelta and rmsprop available, sgd very hard to use, not recommanded (probably need momentum and decaying learning rate).
encoder='lstm', # TODO: can be removed must be lstm.
saveto='lstm_model.npz', # The best model will be saved there
validFreq=370, # Compute the validation error after this number of update.
saveFreq=1110, # Save the parameters after every saveFreq updates
maxlen=100, # Sequence longer then this get ignored
batch_size=16, # The batch size during training.
valid_batch_size=64, # The batch size used for validation/test set.
dataset='imdb',
# Parameter for extra option
noise_std=0.,
use_dropout=True, # if False slightly faster, but worst test error
# This frequently need a bigger model.
reload_model="", # Path to a saved model we want to start from.
test_size=-1, # If >0, we keep only this number of test example.
):
# Model options
model_options = locals().copy()
print "model options", model_options
load_data, prepare_data = get_dataset(dataset)
print 'Loading data'
train, valid, test = load_data(n_words=n_words, valid_portion=0.05,
maxlen=maxlen)
if test_size > 0:
# The test set is sorted by size, but we want to keep random
# size example. So we must select a random selection of the
# examples.
idx = numpy.arange(len(test[0]))
random.shuffle(idx)
idx = idx[:test_size]
test = ([test[0][n] for n in idx], [test[1][n] for n in idx])
ydim = numpy.max(train[1]) + 1
model_options['ydim'] = ydim
print 'Building model'
# This create the initial parameters as numpy ndarrays.
# Dict name (string) -> numpy ndarray
params = init_params(model_options)
if reload_model:
load_params('lstm_model.npz', params)
# This create Theano Shared Variable from the parameters.
# Dict name (string) -> Theano Tensor Shared Variable
# params and tparams have different copy of the weights.
tparams = init_tparams(params)
# use_noise is for dropout
(use_noise, x, mask,
y, f_pred_prob, f_pred, cost) = build_model(tparams, model_options)
if decay_c > 0.:
decay_c = theano.shared(numpy_floatX(decay_c), name='decay_c')
weight_decay = 0.
weight_decay += (tparams['U'] ** 2).sum()
weight_decay *= decay_c
cost += weight_decay
f_cost = theano.function([x, mask, y], cost, name='f_cost')
grads = tensor.grad(cost, wrt=tparams.values())
f_grad = theano.function([x, mask, y], grads, name='f_grad')
lr = tensor.scalar(name='lr')
f_grad_shared, f_update = optimizer(lr, tparams, grads,
x, mask, y, cost)
print 'Optimization'
kf_valid = get_minibatches_idx(len(valid[0]), valid_batch_size)
kf_test = get_minibatches_idx(len(test[0]), valid_batch_size)
print "%d train examples" % len(train[0])
print "%d valid examples" % len(valid[0])
print "%d test examples" % len(test[0])
history_errs = []
best_p = None
bad_count = 0
if validFreq == -1:
validFreq = len(train[0]) / batch_size
if saveFreq == -1:
saveFreq = len(train[0]) / batch_size
uidx = 0 # the number of update done
estop = False # early stop
start_time = time.clock()
try:
for eidx in xrange(max_epochs):
n_samples = 0
# Get new shuffled index for the training set.
kf = get_minibatches_idx(len(train[0]), batch_size, shuffle=True)
for _, train_index in kf:
uidx += 1
use_noise.set_value(1.)
# Select the random examples for this minibatch
y = [train[1][t] for t in train_index]
x = [train[0][t]for t in train_index]
# Get the data in numpy.ndarray format
# This swap the axis!
# Return something of shape (minibatch maxlen, n samples)
x, mask, y = prepare_data(x, y)
n_samples += x.shape[1]
cost = f_grad_shared(x, mask, y)
f_update(lrate)
if numpy.isnan(cost) or numpy.isinf(cost):
print 'NaN detected'
return 1., 1., 1.
if numpy.mod(uidx, dispFreq) == 0:
print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost
if saveto and numpy.mod(uidx, saveFreq) == 0:
print 'Saving...',
if best_p is not None:
params = best_p
else:
params = unzip(tparams)
numpy.savez(saveto, history_errs=history_errs, **params)
pkl.dump(model_options, open('%s.pkl' % saveto, 'wb'), -1)
print 'Done'
if numpy.mod(uidx, validFreq) == 0:
use_noise.set_value(0.)
train_err = pred_error(f_pred, prepare_data, train, kf)
valid_err = pred_error(f_pred, prepare_data, valid,
kf_valid)
test_err = pred_error(f_pred, prepare_data, test, kf_test)
history_errs.append([valid_err, test_err])
if (uidx == 0 or
valid_err <= numpy.array(history_errs)[:,
0].min()):
best_p = unzip(tparams)
bad_counter = 0
print ('Train ', train_err, 'Valid ', valid_err,
'Test ', test_err)
if (len(history_errs) > patience and
valid_err >= numpy.array(history_errs)[:-patience,
0].min()):
bad_counter += 1
if bad_counter > patience:
print 'Early Stop!'
estop = True
break
print 'Seen %d samples' % n_samples
if estop:
break
except KeyboardInterrupt:
print "Training interupted"
end_time = time.clock()
if best_p is not None:
zipp(best_p, tparams)
else:
best_p = unzip(tparams)
use_noise.set_value(0.)
kf_train_sorted = get_minibatches_idx(len(train[0]), batch_size)
train_err = pred_error(f_pred, prepare_data, train, kf_train_sorted)
valid_err = pred_error(f_pred, prepare_data, valid, kf_valid)
test_err = pred_error(f_pred, prepare_data, test, kf_test)
print 'Train ', train_err, 'Valid ', valid_err, 'Test ', test_err
if saveto:
numpy.savez(saveto, train_err=train_err,
valid_err=valid_err, test_err=test_err,
history_errs=history_errs, **best_p)
print 'The code run for %d epochs, with %f sec/epochs' % (
(eidx + 1), (end_time - start_time) / (1. * (eidx + 1)))
print >> sys.stderr, ('Training took %.1fs' %
(end_time - start_time))
return train_err, valid_err, test_err
if __name__ == '__main__':
# See function train for all possible parameter and there definition.
train_lstm(
#reload_model="lstm_model.npz",
max_epochs=100,
test_size=500,
)
================================================
FILE: DeepLearningTutorials/code/mlp.py
================================================
"""
This tutorial introduces the multilayer perceptron using Theano.
A multilayer perceptron is a logistic regressor where
instead of feeding the input to the logistic regression you insert a
intermediate layer, called the hidden layer, that has a nonlinear
activation function (usually tanh or sigmoid) . One can use many such
hidden layers making the architecture deep. The tutorial will also tackle
the problem of MNIST digit classification.
.. math::
f(x) = G( b^{(2)} + W^{(2)}( s( b^{(1)} + W^{(1)} x))),
References:
- textbooks: "Pattern Recognition and Machine Learning" -
Christopher M. Bishop, section 5
"""
__docformat__ = 'restructedtext en'
import os
import sys
import time
import numpy
import theano
import theano.tensor as T
from logistic_sgd import LogisticRegression, load_data
# start-snippet-1
class HiddenLayer(object):
def __init__(self, rng, input, n_in, n_out, W=None, b=None,
activation=T.tanh):
"""
Typical hidden layer of a MLP: units are fully-connected and have
sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
and the bias vector b is of shape (n_out,).
NOTE : The nonlinearity used here is tanh
Hidden unit activation is given by: tanh(dot(input,W) + b)
:type rng: numpy.random.RandomState
:param rng: a random number generator used to initialize weights
:type input: theano.tensor.dmatrix
:param input: a symbolic tensor of shape (n_examples, n_in)
:type n_in: int
:param n_in: dimensionality of input
:type n_out: int
:param n_out: number of hidden units
:type activation: theano.Op or function
:param activation: Non linearity to be applied in the hidden
layer
"""
self.input = input
# end-snippet-1
# `W` is initialized with `W_values` which is uniformely sampled
# from sqrt(-6./(n_in+n_hidden)) and sqrt(6./(n_in+n_hidden))
# for tanh activation function
# the output of uniform if converted using asarray to dtype
# theano.config.floatX so that the code is runable on GPU
# Note : optimal initialization of weights is dependent on the
# activation function used (among other things).
# For example, results presented in [Xavier10] suggest that you
# should use 4 times larger initial weights for sigmoid
# compared to tanh
# We have no info for other function, so we use the same as
# tanh.
if W is None:
W_values = numpy.asarray(
rng.uniform(
low=-numpy.sqrt(6. / (n_in + n_out)),
high=numpy.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)
),
dtype=theano.config.floatX
)
if activation == theano.tensor.nnet.sigmoid:
W_values *= 4
W = theano.shared(value=W_values, name='W', borrow=True)
if b is None:
b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
b = theano.shared(value=b_values, name='b', borrow=True)
self.W = W
self.b = b
lin_output = T.dot(input, self.W) + self.b
self.output = (
lin_output if activation is None
else activation(lin_output)
)
# parameters of the model
self.params = [self.W, self.b]
# start-snippet-2
class MLP(object):
"""Multi-Layer Perceptron Class
A multilayer perceptron is a feedforward artificial neural network model
that has one layer or more of hidden units and nonlinear activations.
Intermediate layers usually have as activation function tanh or the
sigmoid function (defined here by a ``HiddenLayer`` class) while the
top layer is a softamx layer (defined here by a ``LogisticRegression``
class).
"""
def __init__(self, rng, input, n_in, n_hidden, n_out):
"""Initialize the parameters for the multilayer perceptron
:type rng: numpy.random.RandomState
:param rng: a random number generator used to initialize weights
:type input: theano.tensor.TensorType
:param input: symbolic variable that describes the input of the
architecture (one minibatch)
:type n_in: int
:param n_in: number of input units, the dimension of the space in
which the datapoints lie
:type n_hidden: int
:param n_hidden: number of hidden units
:type n_out: int
:param n_out: number of output units, the dimension of the space in
which the labels lie
"""
# Since we are dealing with a one hidden layer MLP, this will translate
# into a HiddenLayer with a tanh activation function connected to the
# LogisticRegression layer; the activation function can be replaced by
# sigmoid or any other nonlinear function
self.hiddenLayer = HiddenLayer(
rng=rng,
input=input,
n_in=n_in,
n_out=n_hidden,
activation=T.tanh
)
# The logistic regression layer gets as input the hidden units
# of the hidden layer
self.logRegressionLayer = LogisticRegression(
input=self.hiddenLayer.output,
n_in=n_hidden,
n_out=n_out
)
# end-snippet-2 start-snippet-3
# L1 norm ; one regularization option is to enforce L1 norm to
# be small
self.L1 = (
abs(self.hiddenLayer.W).sum()
+ abs(self.logRegressionLayer.W).sum()
)
# square of L2 norm ; one regularization option is to enforce
# square of L2 norm to be small
self.L2_sqr = (
(self.hiddenLayer.W ** 2).sum()
+ (self.logRegressionLayer.W ** 2).sum()
)
# negative log likelihood of the MLP is given by the negative
# log likelihood of the output of the model, computed in the
# logistic regression layer
self.negative_log_likelihood = (
self.logRegressionLayer.negative_log_likelihood
)
# same holds for the function computing the number of errors
self.errors = self.logRegressionLayer.errors
# the parameters of the model are the parameters of the two layer it is
# made out of
self.params = self.hiddenLayer.params + self.logRegressionLayer.params
# end-snippet-3
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
dataset='mnist.pkl.gz', batch_size=20, n_hidden=500):
"""
Demonstrate stochastic gradient descent optimization for a multilayer
perceptron
This is demonstrated on MNIST.
:type learning_rate: float
:param learning_rate: learning rate used (factor for the stochastic
gradient
:type L1_reg: float
:param L1_reg: L1-norm's weight when added to the cost (see
regularization)
:type L2_reg: float
:param L2_reg: L2-norm's weight when added to the cost (see
regularization)
:type n_epochs: int
:param n_epochs: maximal number of epochs to run the optimizer
:type dataset: string
:param dataset: the path of the MNIST dataset file from
http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz
"""
datasets = load_data(dataset)
train_set_x, train_set_y = datasets[0]
valid_set_x, valid_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]
# compute number of minibatches for training, validation and testing
n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size
######################
# BUILD ACTUAL MODEL #
######################
print '... building the model'
# allocate symbolic variables for the data
index = T.lscalar() # index to a [mini]batch
x = T.matrix('x') # the data is presented as rasterized images
y = T.ivector('y') # the labels are presented as 1D vector of
# [int] labels
rng = numpy.random.RandomState(1234)
# construct the MLP class
classifier = MLP(
rng=rng,
input=x,
n_in=28 * 28,
n_hidden=n_hidden,
n_out=10
)
# start-snippet-4
# the cost we minimize during training is the negative log likelihood of
# the model plus the regularization terms (L1 and L2); cost is expressed
# here symbolically
cost = (
classifier.negative_log_likelihood(y)
+ L1_reg * classifier.L1
+ L2_reg * classifier.L2_sqr
)
# end-snippet-4
# compiling a Theano function that computes the mistakes that are made
# by the model on a minibatch
test_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: test_set_x[index * batch_size:(index + 1) * batch_size],
y: test_set_y[index * batch_size:(index + 1) * batch_size]
}
)
validate_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: valid_set_x[index * batch_size:(index + 1) * batch_size],
y: valid_set_y[index * batch_size:(index + 1) * batch_size]
}
)
# start-snippet-5
# compute the gradient of cost with respect to theta (sotred in params)
# the resulting gradients will be stored in a list gparams
gparams = [T.grad(cost, param) for param in classifier.params]
# specify how to update the parameters of the model as a list of
# (variable, update expression) pairs
# given two list the zip A = [a1, a2, a3, a4] and B = [b1, b2, b3, b4] of
# same length, zip generates a list C of same size, where each element
# is a pair formed from the two lists :
# C = [(a1, b1), (a2, b2), (a3, b3), (a4, b4)]
updates = [
(param, param - learning_rate * gparam)
for param, gparam in zip(classifier.params, gparams)
]
# compiling a Theano function `train_model` that returns the cost, but
# in the same time updates the parameter of the model based on the rules
# defined in `updates`
train_model = theano.function(
inputs=[index],
outputs=cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
# end-snippet-5
###############
# TRAIN MODEL #
###############
print '... training'
# early-stopping parameters
patience = 10000 # look as this many examples regardless
patience_increase = 2 # wait this much longer when a new best is
# found
improvement_threshold = 0.995 # a relative improvement of this much is
# considered significant
validation_frequency = min(n_train_batches, patience / 2)
# go through this many
# minibatche before checking the network
# on the validation set; in this case we
# check every epoch
best_validation_loss = numpy.inf
best_iter = 0
test_score = 0.
start_time = time.clock()
epoch = 0
done_looping = False
while (epoch < n_epochs) and (not done_looping):
epoch = epoch + 1
for minibatch_index in xrange(n_train_batches):
minibatch_avg_cost = train_model(minibatch_index)
# iteration number
iter = (epoch - 1) * n_train_batches + minibatch_index
if (iter + 1) % validation_frequency == 0:
# compute zero-one loss on validation set
validation_losses = [validate_model(i) for i
in xrange(n_valid_batches)]
this_validation_loss = numpy.mean(validation_losses)
print(
'epoch %i, minibatch %i/%i, validation error %f %%' %
(
epoch,
minibatch_index + 1,
n_train_batches,
this_validation_loss * 100.
)
)
# if we got the best validation score until now
if this_validation_loss < best_validation_loss:
#improve patience if loss improvement is good enough
if (
this_validation_loss < best_validation_loss *
improvement_threshold
):
patience = max(patience, iter * patience_increase)
best_validation_loss = this_validation_loss
best_iter = iter
# test it on the test set
test_losses = [test_model(i) for i
in xrange(n_test_batches)]
test_score = numpy.mean(test_losses)
print((' epoch %i, minibatch %i/%i, test error of '
'best model %f %%') %
(epoch, minibatch_index + 1, n_train_batches,
test_score * 100.))
if patience <= iter:
done_looping = True
break
end_time = time.clock()
print(('Optimization complete. Best validation score of %f %% '
'obtained at iteration %i, with test performance %f %%') %
(best_validation_loss * 100., best_iter + 1, test_score * 100.))
print >> sys.stderr, ('The code for file ' +
os.path.split(__file__)[1] +
' ran for %.2fm' % ((end_time - start_time) / 60.))
if __name__ == '__main__':
test_mlp()
================================================
FILE: DeepLearningTutorials/code/rbm.py
================================================
"""This tutorial introduces restricted boltzmann machines (RBM) using Theano.
Boltzmann Machines (BMs) are a particular form of energy-based model which
contain hidden variables. Restricted Boltzmann Machines further restrict BMs
to those without visible-visible and hidden-hidden connections.
"""
import time
try:
import PIL.Image as Image
except ImportError:
import Image
import numpy
import theano
import theano.tensor as T
import os
from theano.tensor.shared_randomstreams import RandomStreams
from utils import tile_raster_images
from logistic_sgd import load_data
# start-snippet-1
class RBM(object):
"""Restricted Boltzmann Machine (RBM) """
def __init__(
self,
input=None,
n_visible=784,
n_hidden=500,
W=None,
hbias=None,
vbias=None,
numpy_rng=None,
theano_rng=None
):
"""
RBM constructor. Defines the parameters of the model along with
basic operations for inferring hidden from visible (and vice-versa),
as well as for performing CD updates.
:param input: None for standalone RBMs or symbolic variable if RBM is
part of a larger graph.
:param n_visible: number of visible units
:param n_hidden: number of hidden units
:param W: None for standalone RBMs or symbolic variable pointing to a
shared weight matrix in case RBM is part of a DBN network; in a DBN,
the weights are shared between RBMs and layers of a MLP
:param hbias: None for standalone RBMs or symbolic variable pointing
to a shared hidden units bias vector in case RBM is part of a
different network
:param vbias: None for standalone RBMs or a symbolic variable
pointing to a shared visible units bias
"""
self.n_visible = n_visible
self.n_hidden = n_hidden
if numpy_rng is None:
# create a number generator
numpy_rng = numpy.random.RandomState(1234)
if theano_rng is None:
theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
if W is None:
# W is initialized with `initial_W` which is uniformely
# sampled from -4*sqrt(6./(n_visible+n_hidden)) and
# 4*sqrt(6./(n_hidden+n_visible)) the output of uniform if
# converted using asarray to dtype theano.config.floatX so
# that the code is runable on GPU
initial_W = numpy.asarray(
numpy_rng.uniform(
low=-4 * numpy.sqrt(6. / (n_hidden + n_visible)),
high=4 * numpy.sqrt(6. / (n_hidden + n_visible)),
size=(n_visible, n_hidden)
),
dtype=theano.config.floatX
)
# theano shared variables for weights and biases
W = theano.shared(value=initial_W, name='W', borrow=True)
if hbias i
gitextract_7ps85ir2/
├── .gitignore
├── DBN.py
├── DeepLearningTutorials/
│ ├── .gitignore
│ ├── .hgignore
│ ├── .travis.yml
│ ├── README.rst
│ ├── __init__.py
│ ├── code/
│ │ ├── DBN.py
│ │ ├── SdA.py
│ │ ├── __init__.py
│ │ ├── cA.py
│ │ ├── convolutional_mlp.py
│ │ ├── dA.py
│ │ ├── hmc/
│ │ │ ├── __init__.py
│ │ │ ├── hmc.py
│ │ │ └── test_hmc.py
│ │ ├── imdb.py
│ │ ├── imdb_preprocess.py
│ │ ├── logistic_cg.py
│ │ ├── logistic_sgd.py
│ │ ├── lstm.py
│ │ ├── mlp.py
│ │ ├── rbm.py
│ │ ├── rnnrbm.py
│ │ ├── rnnslu.py
│ │ ├── test.py
│ │ └── utils.py
│ ├── data/
│ │ ├── download.sh
│ │ └── training_colorpatches_16x16_demo.mat
│ ├── doc/
│ │ ├── .templates/
│ │ │ └── layout.html
│ │ ├── DBN.txt
│ │ ├── LICENSE.txt
│ │ ├── Makefile
│ │ ├── SdA.txt
│ │ ├── conf.py
│ │ ├── contents.txt
│ │ ├── dA.txt
│ │ ├── deep.txt
│ │ ├── gettingstarted.txt
│ │ ├── hmc.txt
│ │ ├── index.txt
│ │ ├── lenet.txt
│ │ ├── logreg.txt
│ │ ├── lstm.txt
│ │ ├── mlp.txt
│ │ ├── rbm.txt
│ │ ├── references.txt
│ │ ├── rnnrbm.txt
│ │ ├── rnnslu.txt
│ │ ├── scripts/
│ │ │ └── docgen.py
│ │ └── utilities.txt
│ ├── issues_closed/
│ │ └── 2_RBM_cost_fn.txt
│ ├── issues_open/
│ │ ├── 1_SdA_performance.txt
│ │ ├── 3_RBM_scan_GPU.txt
│ │ ├── 4_RBM_scan.txt
│ │ ├── 5_results.txt
│ │ └── 6_benchmarking_pybrain.txt
│ └── misc/
│ └── do_nightly_build
├── README.md
├── joplin/
│ ├── alabama.xml
│ ├── cleopha.xml
│ ├── entertainer.xml
│ ├── maple_leaf.xml
│ ├── searchlight.xml
│ ├── strenous.xml
│ ├── syncopations.xml
│ ├── winners.xml
│ └── winners_2.xml
├── joplin-model.pickle
├── joplin_data.pickle
├── midi/
│ ├── DataTypeConverters.py
│ ├── EventDispatcher.py
│ ├── Icon_
│ ├── MidiFileParser.py
│ ├── MidiInFile.py
│ ├── MidiInStream.py
│ ├── MidiOutFile.py
│ ├── MidiOutStream.py
│ ├── MidiToText.py
│ ├── RawInstreamFile.py
│ ├── RawOutstreamFile.py
│ ├── __init__.py
│ ├── changes.txt
│ ├── constants.py
│ ├── example_mimimal_type0.py
│ ├── example_print_channel_0.py
│ ├── example_print_events.py
│ ├── example_print_file.py
│ ├── example_transpose_octave.py
│ ├── files.txt
│ ├── hallelujah.mid
│ ├── license.txt
│ ├── readme
│ ├── readme.txt
│ ├── utils.py
│ └── version.txt
├── myparser.py
└── neural-plugin/
├── DoubleTime.js
├── neural-plugin.js
├── neural-plugin.ui
└── output-window.ui
SYMBOL INDEX (360 symbols across 37 files)
FILE: DBN.py
class AutoencodingDBN (line 33) | class AutoencodingDBN(object):
method __init__ (line 39) | def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
method dump_params (line 182) | def dump_params(self, outLoc):
method pretraining_functions (line 196) | def pretraining_functions(self, train_set_x, batch_size, k):
method build_finetune_functions (line 246) | def build_finetune_functions(self, train_set_x, batch_size, learning_r...
method build_generative_finetune_fns (line 304) | def build_generative_finetune_fns(self, train_set_outputs, train_set_l...
method generate (line 356) | def generate(self, top_level):
method label (line 370) | def label(self, to_label, x_mask, learning_rate):
method train_dbn (line 391) | def train_dbn(self, data_file, finetune_lr=0.01, pretraining_epochs=100,
method sample (line 534) | def sample(self, top_level=None, rootLoc='./', save=True, threshold=0.5,
method label_from_file (line 559) | def label_from_file(self, rootLoc, fileLoc, learn_rate, n_iters, thres...
function melody_blocker (line 604) | def melody_blocker(snippet):
function load_from_dump (line 623) | def load_from_dump(inLoc):
FILE: DeepLearningTutorials/code/DBN.py
class DBN (line 19) | class DBN(object):
method __init__ (line 30) | def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
method pretraining_functions (line 139) | def pretraining_functions(self, train_set_x, batch_size, k):
method build_finetune_functions (line 189) | def build_finetune_functions(self, datasets, batch_size, learning_rate):
function test_DBN (line 279) | def test_DBN(finetune_lr=0.1, pretraining_epochs=100,
FILE: DeepLearningTutorials/code/SdA.py
class SdA (line 48) | class SdA(object):
method __init__ (line 59) | def __init__(
method pretraining_functions (line 180) | def pretraining_functions(self, train_set_x, batch_size):
method build_finetune_functions (line 231) | def build_finetune_functions(self, datasets, batch_size, learning_rate):
function test_SdA (line 326) | def test_SdA(finetune_lr=0.1, pretraining_epochs=15,
FILE: DeepLearningTutorials/code/cA.py
class cA (line 50) | class cA(object):
method __init__ (line 79) | def __init__(self, numpy_rng, input=None, n_visible=784, n_hidden=100,
method get_hidden_values (line 172) | def get_hidden_values(self, input):
method get_jacobian (line 176) | def get_jacobian(self, hidden, W):
method get_reconstructed_input (line 186) | def get_reconstructed_input(self, hidden):
method get_cost_updates (line 193) | def get_cost_updates(self, contraction_level, learning_rate):
function test_cA (line 228) | def test_cA(learning_rate=0.01, training_epochs=20,
FILE: DeepLearningTutorials/code/convolutional_mlp.py
class LeNetConvPoolLayer (line 39) | class LeNetConvPoolLayer(object):
method __init__ (line 42) | def __init__(self, rng, input, filter_shape, image_shape, poolsize=(2,...
function evaluate_lenet5 (line 114) | def evaluate_lenet5(learning_rate=0.1, n_epochs=200,
function experiment (line 342) | def experiment(state, channel):
FILE: DeepLearningTutorials/code/dA.py
class dA (line 53) | class dA(object):
method __init__ (line 77) | def __init__(
method get_corrupted_input (line 196) | def get_corrupted_input(self, input, corruption_level):
method get_hidden_values (line 222) | def get_hidden_values(self, input):
method get_reconstructed_input (line 226) | def get_reconstructed_input(self, hidden):
method get_cost_updates (line 233) | def get_cost_updates(self, corruption_level, learning_rate):
function test_dA (line 263) | def test_dA(learning_rate=0.1, training_epochs=15,
FILE: DeepLearningTutorials/code/hmc/hmc.py
function kinetic_energy (line 15) | def kinetic_energy(vel):
function hamiltonian (line 33) | def hamiltonian(pos, vel, energy_fn):
function metropolis_hastings_accept (line 58) | def metropolis_hastings_accept(energy_prev, energy_next, s_rng):
function simulate_dynamics (line 83) | def simulate_dynamics(initial_pos, initial_vel, stepsize, n_steps, energ...
function hmc_move (line 177) | def hmc_move(s_rng, positions, energy_fn, stepsize, n_steps):
function hmc_updates (line 231) | def hmc_updates(positions, stepsize, avg_acceptance_rate, final_pos, acc...
class HMC_sampler (line 308) | class HMC_sampler(object):
method __init__ (line 324) | def __init__(self, **kwargs):
method new_from_shared_positions (line 328) | def new_from_shared_positions(
method draw (line 400) | def draw(self, **kwargs):
FILE: DeepLearningTutorials/code/hmc/test_hmc.py
function sampler_on_nd_gaussian (line 8) | def sampler_on_nd_gaussian(sampler_cls, burnin, n_samples, dim=10):
function test_hmc (line 56) | def test_hmc():
FILE: DeepLearningTutorials/code/imdb.py
function prepare_data (line 9) | def prepare_data(seqs, labels, maxlen=None):
function get_dataset_file (line 51) | def get_dataset_file(dataset, default_dataset, origin):
function load_data (line 77) | def load_data(path="imdb.pkl", n_words=100000, valid_portion=0.1, maxlen...
FILE: DeepLearningTutorials/code/imdb_preprocess.py
function tokenize (line 28) | def tokenize(sentences):
function build_dict (line 40) | def build_dict(path):
function grab_data (line 80) | def grab_data(path, dictionary):
function main (line 98) | def main():
FILE: DeepLearningTutorials/code/logistic_cg.py
class LogisticRegression (line 51) | class LogisticRegression(object):
method __init__ (line 60) | def __init__(self, input, n_in, n_out):
method negative_log_likelihood (line 100) | def negative_log_likelihood(self, y):
method errors (line 117) | def errors(self, y):
function cg_optimization_mnist (line 141) | def cg_optimization_mnist(n_epochs=50, mnist_pkl_gz='mnist.pkl.gz'):
FILE: DeepLearningTutorials/code/logistic_sgd.py
class LogisticRegression (line 49) | class LogisticRegression(object):
method __init__ (line 58) | def __init__(self, input, n_in, n_out):
method negative_log_likelihood (line 112) | def negative_log_likelihood(self, y):
method errors (line 144) | def errors(self, y):
function load_data (line 169) | def load_data(dataset):
function sgd_optimization_mnist (line 248) | def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000,
FILE: DeepLearningTutorials/code/lstm.py
function numpy_floatX (line 21) | def numpy_floatX(data):
function get_minibatches_idx (line 25) | def get_minibatches_idx(n, minibatch_size, shuffle=False):
function get_dataset (line 49) | def get_dataset(name):
function zipp (line 53) | def zipp(params, tparams):
function unzip (line 61) | def unzip(zipped):
function dropout_layer (line 71) | def dropout_layer(state_before, use_noise, trng):
function _p (line 81) | def _p(pp, name):
function init_params (line 85) | def init_params(options):
function load_params (line 105) | def load_params(path, params):
function init_tparams (line 115) | def init_tparams(params):
function get_layer (line 122) | def get_layer(name):
function ortho_weight (line 127) | def ortho_weight(ndim):
function param_init_lstm (line 133) | def param_init_lstm(options, params, prefix='lstm'):
function lstm_layer (line 155) | def lstm_layer(tparams, state_below, options, prefix='lstm', mask=None):
function sgd (line 209) | def sgd(lr, tparams, grads, x, mask, y, cost):
function adadelta (line 237) | def adadelta(lr, tparams, grads, x, mask, y, cost):
function rmsprop (line 270) | def rmsprop(lr, tparams, grads, x, mask, y, cost):
function build_model (line 305) | def build_model(tparams, options):
function pred_probs (line 340) | def pred_probs(f_pred_prob, prepare_data, data, iterator, verbose=False):
function pred_error (line 363) | def pred_error(f_pred, prepare_data, data, iterator, verbose=False):
function train_lstm (line 382) | def train_lstm(
FILE: DeepLearningTutorials/code/mlp.py
class HiddenLayer (line 38) | class HiddenLayer(object):
method __init__ (line 39) | def __init__(self, rng, input, n_in, n_out, W=None, b=None,
class MLP (line 112) | class MLP(object):
method __init__ (line 123) | def __init__(self, rng, input, n_in, n_hidden, n_out):
function test_mlp (line 195) | def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=1000,
FILE: DeepLearningTutorials/code/rbm.py
class RBM (line 27) | class RBM(object):
method __init__ (line 29) | def __init__(
method free_energy (line 127) | def free_energy(self, v_sample):
method propup (line 134) | def propup(self, vis):
method sample_h_given_v (line 148) | def sample_h_given_v(self, v0_sample):
method propdown (line 162) | def propdown(self, hid):
method sample_v_given_h (line 176) | def sample_v_given_h(self, h0_sample):
method gibbs_hvh (line 189) | def gibbs_hvh(self, h0_sample):
method gibbs_vhv (line 197) | def gibbs_vhv(self, v0_sample):
method get_cost_updates (line 206) | def get_cost_updates(self, lr=0.1, persistent=None, k=1):
method get_pseudo_likelihood_cost (line 289) | def get_pseudo_likelihood_cost(self, updates):
method get_reconstruction_cost (line 318) | def get_reconstruction_cost(self, updates, pre_sigmoid_nv):
function test_rbm (line 359) | def test_rbm(learning_rate=0.1, training_epochs=15,
FILE: DeepLearningTutorials/code/rnnrbm.py
function build_rbm (line 30) | def build_rbm(v, W, bv, bh, k):
function shared_normal (line 81) | def shared_normal(num_rows, num_cols, scale=1):
function shared_zeros (line 88) | def shared_zeros(*shape):
function build_rnnrbm (line 93) | def build_rnnrbm(n_visible, n_hidden, n_hidden_recurrent):
class RnnRbm (line 177) | class RnnRbm:
method __init__ (line 181) | def __init__(
method train (line 231) | def train(self, files, batch_size=100, num_epochs=200):
method generate (line 267) | def generate(self, filename, show=True):
function test_rnnrbm (line 289) | def test_rnnrbm(batch_size=100, num_epochs=200):
FILE: DeepLearningTutorials/code/rnnslu.py
function shuffle (line 29) | def shuffle(lol, seed):
function contextwin (line 42) | def contextwin(l, win):
function atisfold (line 65) | def atisfold(fold):
function conlleval (line 74) | def conlleval(p, g, w, filename, script_path):
function download (line 105) | def download(origin, destination):
function get_perf (line 114) | def get_perf(filename, folder):
class RNNSLU (line 142) | class RNNSLU(object):
method __init__ (line 144) | def __init__(self, nh, nc, ne, de, cs):
method train (line 234) | def train(self, x, y, window_size, learning_rate):
method save (line 243) | def save(self, folder):
method load (line 248) | def load(self, folder):
function main (line 254) | def main(param=None):
FILE: DeepLearningTutorials/code/test.py
function test_rnnslu (line 18) | def test_rnnslu():
function test_logistic_sgd (line 22) | def test_logistic_sgd():
function test_logistic_cg (line 26) | def test_logistic_cg():
function test_mlp (line 36) | def test_mlp():
function test_convolutional_mlp (line 40) | def test_convolutional_mlp():
function test_dA (line 44) | def test_dA():
function test_SdA (line 48) | def test_SdA():
function test_dbn (line 52) | def test_dbn():
function test_rbm (line 56) | def test_rbm():
function test_rnnrbm (line 61) | def test_rnnrbm():
function test_lstm (line 65) | def test_lstm():
function speed (line 69) | def speed():
FILE: DeepLearningTutorials/code/utils.py
function scale_to_unit_interval (line 13) | def scale_to_unit_interval(ndar, eps=1e-8):
function tile_raster_images (line 21) | def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0),
FILE: DeepLearningTutorials/doc/scripts/docgen.py
function mkdir (line 26) | def mkdir(path):
FILE: midi/DataTypeConverters.py
function getNibbles (line 18) | def getNibbles(byte):
function setNibbles (line 35) | def setNibbles(hiNibble, loNibble):
function readBew (line 53) | def readBew(value):
function writeBew (line 64) | def writeBew(value, length):
function readVar (line 85) | def readVar(value):
function varLen (line 104) | def varLen(value):
function writeVar (line 119) | def writeVar(value):
function to_n_bits (line 127) | def to_n_bits(value, length=1, nbits=7):
function toBytes (line 134) | def toBytes(value):
function fromBytes (line 139) | def fromBytes(value):
FILE: midi/EventDispatcher.py
class EventDispatcher (line 14) | class EventDispatcher:
method __init__ (line 17) | def __init__(self, outstream):
method header (line 50) | def header(self, format, nTracks, division):
method start_of_track (line 55) | def start_of_track(self, current_track):
method sysex_event (line 65) | def sysex_event(self, data):
method eof (line 70) | def eof(self):
method update_time (line 75) | def update_time(self, new_time=0, relative=1):
method reset_time (line 80) | def reset_time(self):
method channel_messages (line 88) | def channel_messages(self, hi_nible, channel, data):
method continuous_controllers (line 139) | def continuous_controllers(self, channel, controller, value):
method system_commons (line 154) | def system_commons(self, common_type, common_data):
method meta_event (line 182) | def meta_event(self, meta_type, data):
FILE: midi/MidiFileParser.py
class MidiFileParser (line 12) | class MidiFileParser:
method __init__ (line 21) | def __init__(self, raw_in, outstream):
method parseMThdChunk (line 37) | def parseMThdChunk(self):
method parseMTrkChunk (line 66) | def parseMTrkChunk(self):
method parseMTrkChunks (line 165) | def parseMTrkChunks(self):
FILE: midi/MidiInFile.py
class MidiInFile (line 7) | class MidiInFile:
method __init__ (line 38) | def __init__(self, outStream, infile):
method read (line 44) | def read(self):
method setData (line 51) | def setData(self, data=''):
FILE: midi/MidiInStream.py
class MidiInStream (line 5) | class MidiInStream:
method __init__ (line 12) | def __init__(self, midiOutStream, device):
method close (line 27) | def close(self):
method read (line 34) | def read(self, time=0):
method resetTimer (line 45) | def resetTimer(self, time=0):
FILE: midi/MidiOutFile.py
class MidiOutFile (line 9) | class MidiOutFile(MidiOutStream):
method __init__ (line 17) | def __init__(self, raw_out=''):
method write (line 23) | def write(self):
method event_slice (line 27) | def event_slice(self, slc):
method note_on (line 41) | def note_on(self, channel=0, note=0x40, velocity=0x40):
method note_off (line 51) | def note_off(self, channel=0, note=0x40, velocity=0x40):
method aftertouch (line 61) | def aftertouch(self, channel=0, note=0x40, velocity=0x40):
method continuous_controller (line 71) | def continuous_controller(self, channel, controller, value):
method patch_change (line 83) | def patch_change(self, channel, patch):
method channel_pressure (line 93) | def channel_pressure(self, channel, pressure):
method pitch_bend (line 103) | def pitch_bend(self, channel, value):
method system_exclusive (line 125) | def system_exclusive(self, data):
method midi_time_code (line 137) | def midi_time_code(self, msg_type, values):
method song_position_pointer (line 146) | def song_position_pointer(self, value):
method song_select (line 156) | def song_select(self, songNumber):
method tuning_request (line 164) | def tuning_request(self):
method header (line 175) | def header(self, format=0, nTracks=1, division=96):
method eof (line 192) | def eof(self):
method meta_slice (line 205) | def meta_slice(self, meta_type, data_slice):
method meta_event (line 212) | def meta_event(self, meta_type, data):
method start_of_track (line 219) | def start_of_track(self, n_track=0):
method end_of_track (line 228) | def end_of_track(self):
method sequence_number (line 244) | def sequence_number(self, value):
method text (line 252) | def text(self, text):
method copyright (line 260) | def copyright(self, text):
method sequence_name (line 269) | def sequence_name(self, text):
method instrument_name (line 277) | def instrument_name(self, text):
method lyric (line 285) | def lyric(self, text):
method marker (line 293) | def marker(self, text):
method cuepoint (line 301) | def cuepoint(self, text):
method midi_ch_prefix (line 309) | def midi_ch_prefix(self, channel):
method midi_port (line 318) | def midi_port(self, value):
method tempo (line 326) | def tempo(self, value):
method smtp_offset (line 337) | def smtp_offset(self, hour, minute, second, frame, framePart):
method time_signature (line 357) | def time_signature(self, nn, dd, cc, bb):
method key_signature (line 373) | def key_signature(self, sf, mi):
method sequencer_specific (line 385) | def sequencer_specific(self, data):
FILE: midi/MidiOutStream.py
class MidiOutStream (line 3) | class MidiOutStream:
method __init__ (line 22) | def __init__(self):
method update_time (line 35) | def update_time(self, new_time=0, relative=1):
method reset_time (line 47) | def reset_time(self):
method rel_time (line 54) | def rel_time(self):
method abs_time (line 58) | def abs_time(self):
method reset_run_stat (line 64) | def reset_run_stat(self):
method set_run_stat (line 68) | def set_run_stat(self, new_status):
method get_run_stat (line 72) | def get_run_stat(self):
method set_current_track (line 78) | def set_current_track(self, new_track):
method get_current_track (line 82) | def get_current_track(self):
method channel_message (line 91) | def channel_message(self, message_type, channel, data):
method note_on (line 96) | def note_on(self, channel=0, note=0x40, velocity=0x40):
method note_off (line 105) | def note_off(self, channel=0, note=0x40, velocity=0x40):
method aftertouch (line 114) | def aftertouch(self, channel=0, note=0x40, velocity=0x40):
method continuous_controller (line 123) | def continuous_controller(self, channel, controller, value):
method patch_change (line 132) | def patch_change(self, channel, patch):
method channel_pressure (line 141) | def channel_pressure(self, channel, pressure):
method pitch_bend (line 150) | def pitch_bend(self, channel, value):
method system_exclusive (line 165) | def system_exclusive(self, data):
method song_position_pointer (line 176) | def song_position_pointer(self, value):
method song_select (line 184) | def song_select(self, songNumber):
method tuning_request (line 192) | def tuning_request(self):
method midi_time_code (line 200) | def midi_time_code(self, msg_type, values):
method header (line 211) | def header(self, format=0, nTracks=1, division=96):
method eof (line 221) | def eof(self):
method meta_event (line 233) | def meta_event(self, meta_type, data):
method start_of_track (line 241) | def start_of_track(self, n_track=0):
method end_of_track (line 249) | def end_of_track(self):
method sequence_number (line 257) | def sequence_number(self, value):
method text (line 265) | def text(self, text):
method copyright (line 274) | def copyright(self, text):
method sequence_name (line 283) | def sequence_name(self, text):
method instrument_name (line 292) | def instrument_name(self, text):
method lyric (line 300) | def lyric(self, text):
method marker (line 308) | def marker(self, text):
method cuepoint (line 316) | def cuepoint(self, text):
method midi_ch_prefix (line 324) | def midi_ch_prefix(self, channel):
method midi_port (line 332) | def midi_port(self, value):
method tempo (line 340) | def tempo(self, value):
method smtp_offset (line 350) | def smtp_offset(self, hour, minute, second, frame, framePart):
method time_signature (line 370) | def time_signature(self, nn, dd, cc, bb):
method key_signature (line 385) | def key_signature(self, sf, mi):
method sequencer_specific (line 397) | def sequencer_specific(self, data):
method timing_clock (line 410) | def timing_clock(self):
method song_start (line 419) | def song_start(self):
method song_stop (line 428) | def song_stop(self):
method song_continue (line 437) | def song_continue(self):
method active_sensing (line 446) | def active_sensing(self):
method system_reset (line 455) | def system_reset(self):
FILE: midi/MidiToText.py
class MidiToText (line 4) | class MidiToText(MidiOutStream):
method channel_message (line 15) | def channel_message(self, message_type, channel, data):
method note_on (line 20) | def note_on(self, channel=0, note=0x40, velocity=0x40):
method note_off (line 23) | def note_off(self, channel=0, note=0x40, velocity=0x40):
method aftertouch (line 26) | def aftertouch(self, channel=0, note=0x40, velocity=0x40):
method continuous_controller (line 30) | def continuous_controller(self, channel, controller, value):
method patch_change (line 34) | def patch_change(self, channel, patch):
method channel_pressure (line 38) | def channel_pressure(self, channel, pressure):
method pitch_bend (line 42) | def pitch_bend(self, channel, value):
method system_exclusive (line 51) | def system_exclusive(self, data):
method song_position_pointer (line 55) | def song_position_pointer(self, value):
method song_select (line 59) | def song_select(self, songNumber):
method tuning_request (line 63) | def tuning_request(self):
method midi_time_code (line 67) | def midi_time_code(self, msg_type, values):
method header (line 75) | def header(self, format=0, nTracks=1, division=96):
method eof (line 80) | def eof(self):
method start_of_track (line 84) | def start_of_track(self, n_track=0):
method end_of_track (line 88) | def end_of_track(self):
method sysex_event (line 97) | def sysex_event(self, data):
method meta_event (line 104) | def meta_event(self, meta_type, data):
method sequence_number (line 108) | def sequence_number(self, value):
method text (line 112) | def text(self, text):
method copyright (line 116) | def copyright(self, text):
method sequence_name (line 120) | def sequence_name(self, text):
method instrument_name (line 124) | def instrument_name(self, text):
method lyric (line 128) | def lyric(self, text):
method marker (line 132) | def marker(self, text):
method cuepoint (line 136) | def cuepoint(self, text):
method midi_ch_prefix (line 140) | def midi_ch_prefix(self, channel):
method midi_port (line 144) | def midi_port(self, value):
method tempo (line 148) | def tempo(self, value):
method smtp_offset (line 152) | def smtp_offset(self, hour, minute, second, frame, framePart):
method time_signature (line 156) | def time_signature(self, nn, dd, cc, bb):
method key_signature (line 160) | def key_signature(self, sf, mi):
method sequencer_specific (line 164) | def sequencer_specific(self, data):
FILE: midi/RawInstreamFile.py
class RawInstreamFile (line 11) | class RawInstreamFile:
method __init__ (line 21) | def __init__(self, infile=''):
method setData (line 46) | def setData(self, data=''):
method setCursor (line 52) | def setCursor(self, position=0):
method getCursor (line 57) | def getCursor(self):
method moveCursor (line 62) | def moveCursor(self, relative_position=0):
method nextSlice (line 68) | def nextSlice(self, length, move_cursor=1):
method readBew (line 77) | def readBew(self, n_bytes=1, move_cursor=1):
method readVarLen (line 85) | def readVarLen(self):
FILE: midi/RawOutstreamFile.py
class RawOutstreamFile (line 12) | class RawOutstreamFile:
method __init__ (line 20) | def __init__(self, outfile=''):
method writeSlice (line 28) | def writeSlice(self, str_slice):
method writeBew (line 33) | def writeBew(self, value, length=1):
method writeVarLen (line 38) | def writeVarLen(self, value):
method write (line 43) | def write(self):
method getvalue (line 55) | def getvalue(self):
FILE: midi/constants.py
function is_status (line 207) | def is_status(byte):
FILE: midi/example_print_channel_0.py
class Transposer (line 9) | class Transposer(MidiOutStream):
method note_on (line 13) | def note_on(self, channel=0, note=0x40, velocity=0x40):
FILE: midi/example_transpose_octave.py
class Transposer (line 10) | class Transposer(MidiOutFile):
method _transp (line 14) | def _transp(self, ch, note):
method note_on (line 22) | def note_on(self, channel=0, note=0x40, velocity=0x40):
method note_off (line 27) | def note_off(self, channel=0, note=0x40, velocity=0x40):
FILE: midi/utils.py
class midiread (line 15) | class midiread(MidiOutStream):
method __init__ (line 16) | def __init__(self, filename, r=(21, 109), dt=0.2):
method abs_time_in_seconds (line 31) | def abs_time_in_seconds(self):
method tempo (line 34) | def tempo(self, value):
method header (line 39) | def header(self, format=0, nTracks=1, division=96):
method note_on (line 42) | def note_on(self, channel=0, note=0x40, velocity=0x40):
method note_off (line 45) | def note_off(self, channel=0, note=0x40, velocity=0x40):
method sysex_event (line 52) | def sysex_event(*args):
method device_name (line 55) | def device_name(*args):
function midiwrite (line 59) | def midiwrite(filename, piano_roll, r=(21, 109), dt=32, patch=0):
FILE: myparser.py
function read (line 9) | def read(filename, noteAdder, speed=1.0):
class CountingNoteAdder (line 93) | class CountingNoteAdder(object):
method __init__ (line 94) | def __init__(self):
method handle (line 97) | def handle(self, time, pitch, dur):
class LegatoNoteAdder (line 100) | class LegatoNoteAdder(object):
method __init__ (line 101) | def __init__(self, maxLen, transpose=0):
method handle (line 106) | def handle(self, time, pitch, dur):
function pitchGetter (line 114) | def pitchGetter(letter, octave, offset):
function fileToData (line 126) | def fileToData(path, transpose=0, windowSize=4):
function fileToSerialData (line 146) | def fileToSerialData(path):
function main (line 155) | def main():
function make_kaldi (line 183) | def make_kaldi(filename, offset):
function make_keras (line 224) | def make_keras():
FILE: neural-plugin/DoubleTime.js
function init (line 8) | function init()
function addChord (line 13) | function addChord(cursor, duration)
function addNote (line 22) | function addNote(chord, pitch)
function addRest (line 29) | function addRest(cursor, duration)
function run (line 37) | function run()
FILE: neural-plugin/neural-plugin.js
function init (line 32) | function init() {
function copyChord (line 39) | function copyChord(oldChord) {
function copyThing (line 51) | function copyThing(source, target) {
function run (line 64) | function run() {
function accept (line 82) | function accept() {
Condensed preview — 101 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (9,344K chars).
[
{
"path": ".gitignore",
"chars": 5,
"preview": "*.pyc"
},
{
"path": "DBN.py",
"chars": 26167,
"preview": "\"\"\"\n\"\"\"\nimport cPickle\nimport os\nimport sys\nimport time\nimport os.path as path\nimport copy\n\nimport numpy\n\nimport theano\n"
},
{
"path": "DeepLearningTutorials/.gitignore",
"chars": 179,
"preview": "code/*.pyc\ncode/*_plots\ncode/tmp*\ncode/midi\ncode/rnnslu\ndata/atis.*\ndata/mnist.pkl.gz\ndata/mnist_py3k.pkl.gz\ndata/Nottin"
},
{
"path": "DeepLearningTutorials/.hgignore",
"chars": 28,
"preview": "syntax: glob\n*.pyc\n*.png\n*~\n"
},
{
"path": "DeepLearningTutorials/.travis.yml",
"chars": 3150,
"preview": "# After changing this file, check it on:\n# http://lint.travis-ci.org/\n\n#We can't get scipy installed with the python lan"
},
{
"path": "DeepLearningTutorials/README.rst",
"chars": 1539,
"preview": "Deep Learning Tutorials\n=======================\n\nDeep Learning is a new area of Machine Learning research, which has bee"
},
{
"path": "DeepLearningTutorials/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "DeepLearningTutorials/code/DBN.py",
"chars": 17050,
"preview": "\"\"\"\n\"\"\"\nimport os\nimport sys\nimport time\n\nimport numpy\n\nimport theano\nimport theano.tensor as T\nfrom theano.tensor.share"
},
{
"path": "DeepLearningTutorials/code/SdA.py",
"chars": 18933,
"preview": "\"\"\"\n This tutorial introduces stacked denoising auto-encoders (SdA) using Theano.\n\n Denoising autoencoders are the build"
},
{
"path": "DeepLearningTutorials/code/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "DeepLearningTutorials/code/cA.py",
"chars": 11850,
"preview": "\"\"\"This tutorial introduces Contractive auto-encoders (cA) using Theano.\n\n They are based on auto-encoders as the ones u"
},
{
"path": "DeepLearningTutorials/code/convolutional_mlp.py",
"chars": 12643,
"preview": "\"\"\"This tutorial introduces the LeNet5 neural network architecture\nusing Theano. LeNet5 is a convolutional neural netwo"
},
{
"path": "DeepLearningTutorials/code/dA.py",
"chars": 14605,
"preview": "\"\"\"\n This tutorial introduces denoising auto-encoders (dA) using Theano.\n\n Denoising autoencoders are the building block"
},
{
"path": "DeepLearningTutorials/code/hmc/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "DeepLearningTutorials/code/hmc/hmc.py",
"chars": 15113,
"preview": "\"\"\"\nTODO\n\"\"\"\n\nimport numpy\n\nfrom theano import function, shared\nfrom theano import tensor as TT\nimport theano\n\nsharedX ="
},
{
"path": "DeepLearningTutorials/code/hmc/test_hmc.py",
"chars": 2218,
"preview": "import numpy\nfrom scipy import linalg\nimport theano\n\nfrom hmc import HMC_sampler\n\n\ndef sampler_on_nd_gaussian(sampler_cl"
},
{
"path": "DeepLearningTutorials/code/imdb.py",
"chars": 5285,
"preview": "import cPickle\nimport gzip\nimport os\n\nimport numpy\nimport theano\n\n\ndef prepare_data(seqs, labels, maxlen=None):\n \"\"\"C"
},
{
"path": "DeepLearningTutorials/code/imdb_preprocess.py",
"chars": 3350,
"preview": "\"\"\"\nThis script is what created the dataset pickled.\n\n1) You need to download this file and put it in the same directory"
},
{
"path": "DeepLearningTutorials/code/logistic_cg.py",
"chars": 10304,
"preview": "\"\"\"\nThis tutorial introduces logistic regression using Theano and conjugate\ngradient descent.\n\nLogistic regression is a "
},
{
"path": "DeepLearningTutorials/code/logistic_sgd.py",
"chars": 16081,
"preview": "\"\"\"\nThis tutorial introduces logistic regression using Theano and stochastic\ngradient descent.\n\nLogistic regression is a"
},
{
"path": "DeepLearningTutorials/code/lstm.py",
"chars": 21138,
"preview": "'''\nBuild a tweet sentiment analyzer\n'''\nfrom collections import OrderedDict\nimport cPickle as pkl\nimport random\nimport "
},
{
"path": "DeepLearningTutorials/code/mlp.py",
"chars": 14183,
"preview": "\"\"\"\nThis tutorial introduces the multilayer perceptron using Theano.\n\n A multilayer perceptron is a logistic regressor w"
},
{
"path": "DeepLearningTutorials/code/rbm.py",
"chars": 20606,
"preview": "\"\"\"This tutorial introduces restricted boltzmann machines (RBM) using Theano.\n\nBoltzmann Machines (BMs) are a particular"
},
{
"path": "DeepLearningTutorials/code/rnnrbm.py",
"chars": 11626,
"preview": "# Author: Nicolas Boulanger-Lewandowski\n# University of Montreal (2012)\n# RNN-RBM deep learning tutorial\n# More informat"
},
{
"path": "DeepLearningTutorials/code/rnnslu.py",
"chars": 13350,
"preview": "from collections import OrderedDict\nimport copy\nimport cPickle\nimport gzip\nimport os\nimport urllib\nimport random\nimport "
},
{
"path": "DeepLearningTutorials/code/test.py",
"chars": 10880,
"preview": "import sys\n\nimport numpy\n\nimport convolutional_mlp\nimport dA\nimport DBN\nimport logistic_cg\nimport logistic_sgd\nimport ml"
},
{
"path": "DeepLearningTutorials/code/utils.py",
"chars": 5101,
"preview": "\"\"\" This file contains different utility functions that are not connected\nin anyway to the networks presented in the tut"
},
{
"path": "DeepLearningTutorials/data/download.sh",
"chars": 1099,
"preview": "#!/bin/sh\n\nwhich wget >/dev/null 2>&1\nWGET=$?\nwhich curl >/dev/null 2>&1\nCURL=$?\nif [ \"$WGET\" -eq 0 ]; then\n DL_CMD=\""
},
{
"path": "DeepLearningTutorials/doc/.templates/layout.html",
"chars": 617,
"preview": "{% extends \"!layout.html\" %}\n\n{%- block extrahead %}\n{{ super() }}\n<script type=\"text/javascript\">\n var _gaq = _gaq || "
},
{
"path": "DeepLearningTutorials/doc/DBN.txt",
"chars": 12034,
"preview": ".. _DBN:\n\nDeep Belief Networks\n====================\n\n.. note::\n This section assumes the reader has already read throug"
},
{
"path": "DeepLearningTutorials/doc/LICENSE.txt",
"chars": 1524,
"preview": ".. _license:\n\nLICENSE\n=======\n\nCopyright (c) 2008--2013, Theano Development Team\nAll rights reserved.\n\nRedistribution an"
},
{
"path": "DeepLearningTutorials/doc/Makefile",
"chars": 31,
"preview": "all:\n\tpython scripts/docgen.py\n"
},
{
"path": "DeepLearningTutorials/doc/SdA.txt",
"chars": 8611,
"preview": ".. _SdA:\n\nStacked Denoising Autoencoders (SdA)\n====================================\n\n.. note::\n This section assumes yo"
},
{
"path": "DeepLearningTutorials/doc/conf.py",
"chars": 7072,
"preview": "# -*- coding: utf-8 -*-\n#\n# theano documentation build configuration file, created by\n# sphinx-quickstart on Tue Oct 7 "
},
{
"path": "DeepLearningTutorials/doc/contents.txt",
"chars": 227,
"preview": "\n.. _contents:\n\n========\nContents\n========\n\n.. toctree::\n :maxdepth: 2\n\n LICENSE\n index\n gettingstarted\n logre"
},
{
"path": "DeepLearningTutorials/doc/dA.txt",
"chars": 21832,
"preview": ".. _daa:\n\nDenoising Autoencoders (dA)\n===========================\n\n.. note::\n This section assumes the reader has alrea"
},
{
"path": "DeepLearningTutorials/doc/deep.txt",
"chars": 4374,
"preview": ".. _deep:\n\nDeep Learning\n=============\n\nThe breakthrough to effective training strategies for deep architectures came in"
},
{
"path": "DeepLearningTutorials/doc/gettingstarted.txt",
"chars": 29586,
"preview": ".. _gettingstarted:\n\n\n===============\nGetting Started\n===============\n\nThese tutorials do not attempt to make up for a g"
},
{
"path": "DeepLearningTutorials/doc/hmc.txt",
"chars": 16068,
"preview": ".. _HMC:\n\nHybrid Monte-Carlo Sampling\n===========================\n\n\n.. note::\n This is an advanced tutorial, which show"
},
{
"path": "DeepLearningTutorials/doc/index.txt",
"chars": 4130,
"preview": "=======================\nDeep Learning Tutorials\n=======================\n\nDeep Learning is a new area of Machine Learning"
},
{
"path": "DeepLearningTutorials/doc/lenet.txt",
"chars": 23520,
"preview": ".. _lenet:\n\nConvolutional Neural Networks (LeNet)\n=====================================\n\n.. note::\n This section assu"
},
{
"path": "DeepLearningTutorials/doc/logreg.txt",
"chars": 11478,
"preview": ".. index:: Logistic Regression\n\n.. _logreg :\n\n\nClassifying MNIST digits using Logistic Regression\n======================"
},
{
"path": "DeepLearningTutorials/doc/lstm.txt",
"chars": 10507,
"preview": ".. _lstm:\n\nLSTM Networks for Sentiment Analysis\n**********************************************\n\nSummary\n+++++++\n\nThis tu"
},
{
"path": "DeepLearningTutorials/doc/mlp.txt",
"chars": 13161,
"preview": ".. index:: Multilayer Perceptron\n\n.. _mlp:\n\n\nMultilayer Perceptron\n=====================\n\n.. note::\n This section ass"
},
{
"path": "DeepLearningTutorials/doc/rbm.txt",
"chars": 23850,
"preview": ".. _RBM:\n\nRestricted Boltzmann Machines (RBM)\n===================================\n\n\n.. note::\n This section assumes the"
},
{
"path": "DeepLearningTutorials/doc/references.txt",
"chars": 3704,
"preview": ".. _references:\n\n==========\nReferences\n==========\n\n.. [Bengio07] Y. Bengio, P. Lamblin, D. Popovici and H. Larochelle, `"
},
{
"path": "DeepLearningTutorials/doc/rnnrbm.txt",
"chars": 7325,
"preview": ".. _rnnrbm:\n\nModeling and generating sequences of polyphonic music with the RNN-RBM\n===================================="
},
{
"path": "DeepLearningTutorials/doc/rnnslu.txt",
"chars": 21994,
"preview": ".. _rnnslu:\n\nRecurrent Neural Networks with Word Embeddings\n**********************************************\n\nSummary\n++++"
},
{
"path": "DeepLearningTutorials/doc/scripts/docgen.py",
"chars": 2062,
"preview": "from __future__ import print_function\nimport sys\nimport os\nimport shutil\n\nimport getopt\nfrom collections import defaultd"
},
{
"path": "DeepLearningTutorials/doc/utilities.txt",
"chars": 6144,
"preview": "=============\nMiscellaneous\n=============\n\n.. _how-to-plot:\n\nPlotting Samples and Filters\n++++++++++++++++++++++++++++\n\n"
},
{
"path": "DeepLearningTutorials/issues_closed/2_RBM_cost_fn.txt",
"chars": 233,
"preview": "Reported by : Razvan\n\nCost function (delta of free energy) has a reversed sign (i.e. free_energy(positive) - free_energy"
},
{
"path": "DeepLearningTutorials/issues_open/1_SdA_performance.txt",
"chars": 242,
"preview": "Reported by : Razvan\n\nBest performance for SdA float64 CPU : 1.23%\n float32 CPU : 1.30%\ntarget :"
},
{
"path": "DeepLearningTutorials/issues_open/3_RBM_scan_GPU.txt",
"chars": 164,
"preview": "Reported by : Razvan\n\nScan is not GPU ready.. making RBM tutorial slow on GPU (not tested yet).\nQuick fix is a optimizat"
},
{
"path": "DeepLearningTutorials/issues_open/4_RBM_scan.txt",
"chars": 419,
"preview": "Reported by : Razvan\n\nThe bug can be reproduced if you do : \n z = scan(..)\n c = f(z[-1])\n gp = T.grad(c, p, consider_con"
},
{
"path": "DeepLearningTutorials/issues_open/5_results.txt",
"chars": 226,
"preview": "Reported by : Razvan\n\nWe should produce results + time for CPU float32 / CPU float64 / GPU . We should also \nspecify the"
},
{
"path": "DeepLearningTutorials/issues_open/6_benchmarking_pybrain.txt",
"chars": 3426,
"preview": "Reported by : Razvan\n\nObservations : \n\n 1. First thing, working with their dataset model is a pain ! Either I had \n "
},
{
"path": "DeepLearningTutorials/misc/do_nightly_build",
"chars": 1582,
"preview": "#!/bin/bash\n#we set the compiledir to the /Tmp dir to make the test faster by bypassing the nfs network.\ndate\nROOT_CWD=/"
},
{
"path": "README.md",
"chars": 805,
"preview": "# neuralnetmusic\nFelix's project for composing music using neural nets.\n\nThis is not in a state fit for public release r"
},
{
"path": "joplin/alabama.xml",
"chars": 1178495,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
},
{
"path": "joplin/cleopha.xml",
"chars": 1082025,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
},
{
"path": "joplin/entertainer.xml",
"chars": 1059919,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
},
{
"path": "joplin/maple_leaf.xml",
"chars": 1011301,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
},
{
"path": "joplin/searchlight.xml",
"chars": 1214672,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
},
{
"path": "joplin/strenous.xml",
"chars": 971063,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
},
{
"path": "joplin/syncopations.xml",
"chars": 878247,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
},
{
"path": "joplin/winners.xml",
"chars": 577147,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
},
{
"path": "joplin/winners_2.xml",
"chars": 269447,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE score-partwise PUBLIC \"-//Recordare//DTD MusicXML 2.0 Partwise//EN\" \"ht"
},
{
"path": "midi/DataTypeConverters.py",
"chars": 5516,
"preview": "# -*- coding: ISO-8859-1 -*-\n\nfrom struct import pack, unpack\n\n\"\"\"\nThis module contains functions for reading and writin"
},
{
"path": "midi/EventDispatcher.py",
"chars": 9042,
"preview": "# -*- coding: ISO-8859-1 -*-\n\n# std library\nfrom struct import unpack\n\n# custom\nfrom DataTypeConverters import readBew, "
},
{
"path": "midi/Icon_",
"chars": 0,
"preview": ""
},
{
"path": "midi/MidiFileParser.py",
"chars": 6432,
"preview": "# -*- coding: ISO-8859-1 -*-\n\n# std library\nfrom struct import unpack\n\n# uhh I don't really like this, but there are so "
},
{
"path": "midi/MidiInFile.py",
"chars": 1490,
"preview": "# -*- coding: ISO-8859-1 -*-\n\nfrom RawInstreamFile import RawInstreamFile\nfrom MidiFileParser import MidiFileParser\n\n\ncl"
},
{
"path": "midi/MidiInStream.py",
"chars": 905,
"preview": "# -*- coding: ISO-8859-1 -*-\n\nfrom MidiOutStream import MidiOutStream\n\nclass MidiInStream:\n\n \"\"\"\n Takes midi event"
},
{
"path": "midi/MidiOutFile.py",
"chars": 10460,
"preview": "# -*- coding: ISO-8859-1 -*-\n\nfrom MidiOutStream import MidiOutStream\nfrom RawOutstreamFile import RawOutstreamFile\n\nfro"
},
{
"path": "midi/MidiOutStream.py",
"chars": 8889,
"preview": "# -*- coding: ISO-8859-1 -*-\n\nclass MidiOutStream:\n\n\n \"\"\"\n\n MidiOutstream is Basically an eventhandler. It is the "
},
{
"path": "midi/MidiToText.py",
"chars": 4137,
"preview": "# -*- coding: ISO-8859-1 -*-\n\nfrom MidiOutStream import MidiOutStream\nclass MidiToText(MidiOutStream):\n\n\n \"\"\"\n Thi"
},
{
"path": "midi/RawInstreamFile.py",
"chars": 3005,
"preview": "# -*- coding: ISO-8859-1 -*-\n\n# standard library imports\nfrom types import StringType\nfrom struct import unpack\n\n# custo"
},
{
"path": "midi/RawOutstreamFile.py",
"chars": 1641,
"preview": "# -*- coding: ISO-8859-1 -*-\n\n# standard library imports\nimport sys\nfrom types import StringType\nfrom struct import unpa"
},
{
"path": "midi/__init__.py",
"chars": 110,
"preview": "# -*- coding: ISO-8859-1 -*-\n\n#import MidiOutStream\n#import MidiInStream\n#import MidiInFile\n#import MidiToText"
},
{
"path": "midi/changes.txt",
"chars": 1880,
"preview": "------------------------------------------------------------------------\nr409 | maxm | 2006-01-05 16:37:29 +0100 (to, 05"
},
{
"path": "midi/constants.py",
"chars": 6365,
"preview": "# -*- coding: ISO-8859-1 -*-\n\n###################################################\n## Definitions of the different midi e"
},
{
"path": "midi/example_mimimal_type0.py",
"chars": 512,
"preview": "from MidiOutFile import MidiOutFile\n\n\"\"\"\nThis is an example of the smallest possible type 0 midi file, where \nall the mi"
},
{
"path": "midi/example_print_channel_0.py",
"chars": 491,
"preview": "from MidiOutStream import MidiOutStream\nfrom MidiInFile import MidiInFile\n\n\"\"\"\nThis prints all note on events on midi ch"
},
{
"path": "midi/example_print_events.py",
"chars": 495,
"preview": "from MidiToText import MidiToText\n\n\"\"\"\nThis is an example that uses the MidiToText eventhandler. When an \nevent is trigg"
},
{
"path": "midi/example_print_file.py",
"chars": 492,
"preview": "\"\"\"\nThis is an example that uses the MidiToText eventhandler. When an \nevent is triggered on it, it prints the event to "
},
{
"path": "midi/example_transpose_octave.py",
"chars": 1036,
"preview": "from MidiOutFile import MidiOutFile\nfrom MidiInFile import MidiInFile\n\n\"\"\"\nThis is an example of the smallest possible t"
},
{
"path": "midi/files.txt",
"chars": 14861,
"preview": "Midi file name\tBWV\tK\tB\tEMB\tR\n000106b_.mid\t1.6\t378\t\t375\t\n000206b_.mid\t2.6\t7\t262\t5\t262\n000306b_.mid\t3.6\t8\t156\t8\t156\n000306"
},
{
"path": "midi/license.txt",
"chars": 692,
"preview": "Modified Python MIDI package\nCopyright (C) 2013 Nicolas Boulanger-Lewandowski\n\nThis program is free software: you can r"
},
{
"path": "midi/readme",
"chars": 2855,
"preview": "JSBChorales.net: ReadMe\rMargaret Greentree\r\r\rThis is a copy of the README found with the midi files of the chorales.\r\rHe"
},
{
"path": "midi/readme.txt",
"chars": 1316,
"preview": "\n-------------------------------------------------------------------------------------\nNicolas Boulanger-Lewandowski (Ja"
},
{
"path": "midi/utils.py",
"chars": 2444,
"preview": "# Author: Nicolas Boulanger-Lewandowski\n# University of Montreal (2013)\n# RNN-RBM deep learning tutorial\n#\n# Implements "
},
{
"path": "midi/version.txt",
"chars": 5,
"preview": "0.1.4"
},
{
"path": "myparser.py",
"chars": 8782,
"preview": "import xml.etree.ElementTree as ET\nimport math\nimport numpy as np\nimport subprocess\nfrom PIL import Image\nimport cPickle"
},
{
"path": "neural-plugin/DoubleTime.js",
"chars": 2800,
"preview": "//=============================================================================\n// HalfTime plugin\n//\n// This plugin c"
},
{
"path": "neural-plugin/neural-plugin.js",
"chars": 5435,
"preview": "//=============================================================================\n// MuseScore\n// Linux Music Score Edit"
},
{
"path": "neural-plugin/neural-plugin.ui",
"chars": 1700,
"preview": "<ui version=\"4.0\" >\n <class>Dialog</class>\n <widget class=\"QDialog\" name=\"Dialog\" >\n <property name=\"geometry\" >\n <re"
},
{
"path": "neural-plugin/output-window.ui",
"chars": 523,
"preview": "<ui version=\"4.0\" >\n <class>Dialog</class>\n <widget class=\"QDialog\" name=\"Dialog\" >\n <property name=\"geometry\" >\n <re"
}
]
// ... and 4 more files (download for full content)
About this extraction
This page contains the full source code of the fephsun/neuralnetmusic GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 101 files (56.2 MB), approximately 2.2M tokens, and a symbol index with 360 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.