Repository: YerevaNN/Spoken-language-identification
Branch: master
Commit: e947dee00f30
Files: 42
Total size: 168.6 KB
Directory structure:
gitextract_7gedhokp/
├── .gitignore
├── LICENSE
├── README.md
├── augment_data.py
├── choose_equal_split.py
├── concatenate_csvs.py
├── create_spectrograms.py
├── ensembling/
│ ├── ensemble.theano.py
│ └── get_output_layers.py
├── get_score_from_probabilities.py
├── get_score_from_top3_prediction.py
├── get_sum_of_csvs.py
├── majority_vote_ensembling.py
├── make_submission.py
├── prototxt/
│ ├── augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024r-1024r_DLR_nolrcoef.prototxt
│ ├── augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt
│ ├── deploy.augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt
│ ├── deploy.main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt
│ ├── main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt
│ ├── solver.augm.nolrcoef.prototxt
│ └── solver.main.adadelta.prototxt
├── test_augm_network.py
├── test_main_network.py
└── theano/
├── README.md
├── main.py
├── networks/
│ ├── __init__.py
│ ├── base_network.py
│ ├── rnn.py
│ ├── rnn_2layers.py
│ ├── rnn_2layers_5khz.py
│ ├── tc_net.py
│ ├── tc_net_deeprnn_shared_pad.py
│ ├── tc_net_mod.py
│ ├── tc_net_mod_5khz_small.py
│ ├── tc_net_rnn.py
│ ├── tc_net_rnn_nodense.py
│ ├── tc_net_rnn_onernn.py
│ ├── tc_net_rnn_onernn_notimepool.py
│ ├── tc_net_rnn_shared.py
│ ├── tc_net_rnn_shared_pad.py
│ └── tc_net_rnn_shared_pad_augm.py
└── plot.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.pyc
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2016 YerevaNN
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# Spoken language identification with deep learning
Read more in the following blog posts:
* [About TopCoder contest and our CNN-based solution implemented in Caffe](http://yerevann.github.io/2015/10/11/spoken-language-identification-with-deep-convolutional-networks/) (October 2015)
* [About combining CNN and RNN using Theano/Lasagne](http://yerevann.github.io/2016/06/26/combining-cnn-and-rnn-for-spoken-language-identification/) (June 2016)
Theano/Lasagne models are [here](/theano). The basic steps to run them are:
* Download the dataset from [here](https://community.topcoder.com/longcontest/?module=ViewProblemStatement&rd=16555&pm=13978) or use your own dataset.
* Create spectrograms for recording using `create_spectrograms.py` or `augment_data.py`. The latter will also augment the data by randomly perturbing the spectrograms and cropping a random interval of length 9s from the recording.
* Create listfiles for training set and validation set, where each row of the a listfile describes one example and has 2 values seperated by a comma. The first one is the name of the example, the second one is the label (counting starts from 0). A typical listfile will look like [this](https://gist.github.com/Harhro94/aa11fe6b454c614cdedea882fd00f8d7).
* Change the `png_folder` and listfile paths in [`theano/main.py`](/theano/main.py).
* Run `theano/main.py`.
================================================
FILE: augment_data.py
================================================
import numpy as np
from matplotlib import pyplot as plt
import scipy.io.wavfile as wav
from numpy.lib import stride_tricks
import PIL.Image as Image
import os
""" short time fourier transform of audio signal """
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig)
# cols for windowing
cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
""" scale frequency axis logarithmically """
def logscale_spec(spec, sr=44100, factor=20., alpha=1.0, f0=0.9, fmax=1):
spec = spec[:, 0:256]
timebins, freqbins = np.shape(spec)
scale = np.linspace(0, 1, freqbins) #** factor
# http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=650310&url=http%3A%2F%2Fieeexplore.ieee.org%2Fiel4%2F89%2F14168%2F00650310
scale = np.array(map(lambda x: x * alpha if x <= f0 else (fmax-alpha*f0)/(fmax-f0)*(x-f0)+alpha*f0, scale))
scale *= (freqbins-1)/max(scale)
newspec = np.complex128(np.zeros([timebins, freqbins]))
allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
freqs = [0.0 for i in range(freqbins)]
totw = [0.0 for i in range(freqbins)]
for i in range(0, freqbins):
if (i < 1 or i + 1 >= freqbins):
newspec[:, i] += spec[:, i]
freqs[i] += allfreqs[i]
totw[i] += 1.0
continue
else:
# scale[15] = 17.2
w_up = scale[i] - np.floor(scale[i])
w_down = 1 - w_up
j = int(np.floor(scale[i]))
newspec[:, j] += w_down * spec[:, i]
freqs[j] += w_down * allfreqs[i]
totw[j] += w_down
newspec[:, j + 1] += w_up * spec[:, i]
freqs[j + 1] += w_up * allfreqs[i]
totw[j + 1] += w_up
for i in range(len(freqs)):
if (totw[i] > 1e-6):
freqs[i] /= totw[i]
return newspec, freqs
""" plot spectrogram"""
def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="gray", channel=0, name='tmp.png', alpha=1, offset=0):
samplerate, samples = wav.read(audiopath)
samples = samples[:, channel]
s = stft(samples, binsize)
sshow, freq = logscale_spec(s, factor=1, sr=samplerate, alpha=alpha)
sshow = sshow[2:, :]
ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel
timebins, freqbins = np.shape(ims)
ims = np.transpose(ims)
ims = ims[0:256, offset:offset+768] # 0-11khz, ~9s interval
#print "ims.shape", ims.shape
image = Image.fromarray(ims)
image = image.convert('L')
image.save(name)
file = open('trainingData.csv', 'r')
for iter, line in enumerate(file.readlines()[1:]): # first line of traininData.csv is header (only for trainingData.csv)
filepath = line.split(',')[0]
filename = filepath[:-4]
wavfile = 'tmp.wav'
os.system('mpg123 -w ' + wavfile + ' /home/brainstorm/caffe/Data/mnt/3/language/train/mp3/' + filepath)
for augmentIdx in range(0, 20):
alpha = np.random.uniform(0.9, 1.1)
offset = np.random.randint(90)
plotstft(wavfile, channel=0, name='/home/brainstorm/data/language/train/pngaugm/'+filename+'.'+str(augmentIdx)+'.png',
alpha=alpha, offset=offset)
os.remove(wavfile)
print "processed %d files" % (iter + 1)
================================================
FILE: choose_equal_split.py
================================================
"""split data into training and validation sets"""
import csv
with open('trainingData.csv', 'rb') as csvfile:
next(csvfile) #skip headers
data = list(csv.reader(csvfile, delimiter=','))
#Map every language to an ID
langs = set([language.strip() for _,language in data])
ID = {lang: i for i,lang in enumerate(sorted(langs))}
#Write first 306 items to training set and the rest to validation set
cnt = [0 for _ in range(len(langs))]
with open('trainEqual.csv', 'w') as train:
with open('valEqaul.csv', 'w') as val:
for line in data:
filepath, language = map(str.strip, line)
id_lang = ID[language]
if (cnt[id_lang] < 306):
train.write(filepath[:-4] + ',' + str(id_lang) + '\n')
else:
val.write(filepath[:-4] + ',' + str(id_lang) + '\n')
cnt[id_lang] += 1
================================================
FILE: concatenate_csvs.py
================================================
""" Usage: python concatenate_csvs.py csv1path csv2path ..
"""
import sys
import numpy as np
n_csv = len(sys.argv) - 1
cnt = 12320
csv = []
for index in range(1, len(sys.argv)):
csv.append(open(sys.argv[index], 'r'))
outfile = open('concatenated.csv', 'w')
for iter in range(12320):
out = []
for index in range(n_csv):
cur_out = csv[index].readline().split(',')
cur_out = [float(x) for x in cur_out]
out += cur_out
out = [("%.6f" % x) for x in out]
outfile.write(','.join(out) + '\n')
================================================
FILE: create_spectrograms.py
================================================
import numpy as np
from matplotlib import pyplot as plt
import scipy.io.wavfile as wav
from numpy.lib import stride_tricks
import PIL.Image as Image
import os
""" short time fourier transform of audio signal """
def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
win = window(frameSize)
hopSize = int(frameSize - np.floor(overlapFac * frameSize))
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig)
# cols for windowing
cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1
# zeros at end (thus samples can be fully covered by frames)
samples = np.append(samples, np.zeros(frameSize))
frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy()
frames *= win
return np.fft.rfft(frames)
""" scale frequency axis logarithmically """
def logscale_spec(spec, sr=44100, factor=20., alpha=1.0, f0=0.9, fmax=1):
spec = spec[:, 0:256]
timebins, freqbins = np.shape(spec)
scale = np.linspace(0, 1, freqbins) #** factor
# http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=650310&url=http%3A%2F%2Fieeexplore.ieee.org%2Fiel4%2F89%2F14168%2F00650310
scale = np.array(map(lambda x: x * alpha if x <= f0 else (fmax-alpha*f0)/(fmax-f0)*(x-f0)+alpha*f0, scale))
scale *= (freqbins-1)/max(scale)
newspec = np.complex128(np.zeros([timebins, freqbins]))
allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1])
freqs = [0.0 for i in range(freqbins)]
totw = [0.0 for i in range(freqbins)]
for i in range(0, freqbins):
if (i < 1 or i + 1 >= freqbins):
newspec[:, i] += spec[:, i]
freqs[i] += allfreqs[i]
totw[i] += 1.0
continue
else:
# scale[15] = 17.2
w_up = scale[i] - np.floor(scale[i])
w_down = 1 - w_up
j = int(np.floor(scale[i]))
newspec[:, j] += w_down * spec[:, i]
freqs[j] += w_down * allfreqs[i]
totw[j] += w_down
newspec[:, j + 1] += w_up * spec[:, i]
freqs[j + 1] += w_up * allfreqs[i]
totw[j + 1] += w_up
for i in range(len(freqs)):
if (totw[i] > 1e-6):
freqs[i] /= totw[i]
return newspec, freqs
""" plot spectrogram"""
def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="gray", channel=0, name='tmp.png', alpha=1, offset=0):
samplerate, samples = wav.read(audiopath)
samples = samples[:, channel]
s = stft(samples, binsize)
sshow, freq = logscale_spec(s, factor=1, sr=samplerate, alpha=alpha)
sshow = sshow[2:, :]
ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel
timebins, freqbins = np.shape(ims)
ims = np.transpose(ims)
# ims = ims[0:256, offset:offset+768] # 0-11khz, ~9s interval
ims = ims[0:256, :] # 0-11khz, ~10s interval
#print "ims.shape", ims.shape
image = Image.fromarray(ims)
image = image.convert('L')
image.save(name)
file = open('trainingData.csv', 'r')
for iter, line in enumerate(file.readlines()[1:]): # first line of traininData.csv is header (only for trainingData.csv)
filepath = line.split(',')[0]
filename = filepath[:-4]
wavfile = 'tmp.wav'
os.system('mpg123 -w ' + wavfile + ' /home/brainstorm/caffe/Data/mnt/3/language/train/mp3/' + filepath)
"""
for augmentIdx in range(0, 20):
alpha = np.random.uniform(0.9, 1.1)
offset = np.random.randint(90)
plotstft(wavfile, channel=0, name='/home/brainstorm/data/language/train/pngaugm/'+filename+'.'+str(augmentIdx)+'.png',
alpha=alpha, offset=offset)
"""
# we create only one spectrogram for each speach sample
# we don't do vocal tract length perturbation (alpha=1.0)
# also we don't crop 9s part from the speech
plotstft(wavfile, channel=0, name='/home/brainstorm/data/language/train/pngaugm/'+filename+'.png', alpha=1.0)
os.remove(wavfile)
print "processed %d files" % (iter + 1)
================================================
FILE: ensembling/ensemble.theano.py
================================================
""" Usage: python ensemble.theano.py model1 [another_model]*
for GPU mode
1. export PATH=$PATH:/usr/local/cuda-6.5/bin
2. THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32,nvcc.flags='-arch=sm_30' python ensemble.theano.py model1 [another_model]*
"""
import cPickle as pickle
import sys
import caffe
import numpy as np
caffe.set_mode_gpu()
def get_score(probs, label):
pred = sorted([(x, it) for it, x in enumerate(probs)], reverse=True)
if (pred[0][1] == label):
return 1000
if (pred[1][1] == label):
return 400
if (pred[2][1] == label):
return 160
return 0
def get_full_score(preds, labels):
topCoderScore = 0.0
for i in range(len(labels)):
topCoderScore += get_score(preds[i], labels[i])
return topCoderScore / len(labels) * 3520
####################### COLLECTING INFO ABOUT LANGS ############################
file = open('../trainingData.csv')
data = file.readlines()[1:]
langs = set()
for line in data:
filepath, language = line.split(',')
language = language.strip()
langs.add(language)
langs = sorted(langs)
file.close()
n_models = len(sys.argv) - 1
X = np.zeros((12320, n_models * 176), dtype=np.float32)
for iter in range(n_models):
csvpath = 'probs/val/' + sys.argv[iter + 1]
csv = open(csvpath, 'r')
for row_id, line in enumerate(csv.readlines()):
mas = line.split(',')
mas = np.array([float(x) for x in mas], dtype=np.float32)
X[row_id, 176*iter:176*(iter+1)] = mas
csv.close()
Y = []
label_file = open('../valEqual.csv')
for line in label_file.readlines():
Y.append(int(line.split(',')[1]))
label_file.close()
print "X.shape =", X.shape
print "len(Y) =", len(Y)
for iter in range(n_models):
print "score of model %d = %f" % (iter+1, get_full_score(X[:, 176*iter:176*(iter+1)], Y))
######################### TRAINING ENSEMBLING MODEL ############################
import theano
import theano.tensor as T
import lasagne
import lasagne.layers as layers
n_train_examples = 10000
X = X.astype(theano.config.floatX)
trainX = X[:n_train_examples]
trainY = Y[:n_train_examples]
valX = X[n_train_examples:]
valY = Y[n_train_examples:]
input_var = T.matrix('X')
target_var = T.ivector('y')
from lasagne.nonlinearities import softmax, sigmoid, rectify
network = lasagne.layers.InputLayer((None, X.shape[1]), input_var)
network = lasagne.layers.DenseLayer(network, 4000, nonlinearity=rectify)
network = lasagne.layers.DenseLayer(lasagne.layers.dropout(network, 0.5), 176, nonlinearity=softmax)
prediction = lasagne.layers.get_output(network)
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
loss = loss.mean() + 0 * lasagne.regularization.regularize_network_params(
network, lasagne.regularization.l2)
params = lasagne.layers.get_all_params(network, trainable=True)
learning_rate = theano.shared(np.float32(0.2))
updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate, momentum=0.9)
train_fn = theano.function([input_var, target_var], loss, updates=updates)
validation_fn = theano.function([input_var, target_var], loss)
for epoch in range(1000):
train_loss = train_fn(trainX, trainY)
val_loss = validation_fn(valX, valY)
print "Epoch %d: train_loss = %f, val_loss = %f, lr = %f" % (epoch + 1, train_loss, val_loss, learning_rate.get_value())
if (epoch > 0 and epoch % 200 == 0):
learning_rate.set_value(np.float32(learning_rate.get_value() * 0.7))
test_prediction = lasagne.layers.get_output(network, deterministic=True)
predict_fn = theano.function([input_var], test_prediction)
all_predictions = predict_fn(valX)
score = 0.0
for probs, label in zip(all_predictions, valY):
score += get_score(probs, label)
print "Final score on ensembling validaion = %f" % score
print "Expected score = %f" % (score / len(valY) * 3520)
print "\n\n==> creating submission..."
X = np.zeros((12320, n_models * 176), dtype=np.float32)
for iter in range(n_models):
csvpath = 'probs/test/' + sys.argv[iter + 1]
csv = open(csvpath, 'r')
for row_id, line in enumerate(csv.readlines()):
mas = line.split(',')
mas = np.array([float(x) for x in mas], dtype=np.float32)
X[row_id, 176*iter:176*(iter+1)] = mas
csv.close()
prediction = predict_fn(X)
print "prediction.shape =", prediction.shape
ensembled = open('ensembled.csv', 'w')
for probs in prediction:
out = [str(x) for x in probs]
ensembled.write(','.join(out) + '\n')
"""
######################### SAVING MODEL TO BE ABLE TO REPRODUCE #################
print "==> Saving model..."
with open("model.pickle", 'w') as save_file:
pickle.dump(obj = {'params' : layers.get_all_param_values(network)}, file = save_file, protocol = -1)
"""
================================================
FILE: ensembling/get_output_layers.py
================================================
""" Usage: python get_output_layers.py test|val
"""
import sys
import caffe
import numpy as np
caffe.set_mode_gpu()
deploy = '../prototxt/deploy.augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt'
model = 'augm_dropout0.3_on_augm84K-lr0.01_30K_iter_75000'
model_path = '../models/' + model + '.caffemodel'
"""
####################### networks with no augmentation ##########################
net = caffe.Classifier(deploy, model_path)
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
net.blobs['data'].reshape(1, 1, 256, 858)
folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/png/'
cnt = 12320
file = open('../valEqual.csv', 'r')
prob_file = open('probs/val/' + model + '.csv', 'w')
for iter in range(cnt):
name = file.readline().split(',')[0]
net.blobs['data'].data[...] = transformer.preprocess('data',
caffe.io.load_image(folder + name + '.png', color=False))
probs = net.forward()['loss'][0]
probs = [str(x) for x in probs]
prob_file.write(','.join(probs) + '\n')
if (iter % 100 == 0):
print "processed %d images" % (iter + 1)
"""
######################### networks with augmentation ###########################
assert sys.argv[1] in ('test', 'val')
dataset = sys.argv[1]
augm_cnt = 20
cnt = 12320
if (dataset == 'val'):
folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/'
file = open('../valEqual.csv', 'r')
else:
folder = '../test/pngaugm/'
file = open('../testingData.csv', 'r')
# sum - mean of augm_cnt versions of speech
# log - mean of logs of augm_cnt versions of speech
# dense - last dense layer, 1024 outputs
prob_file_sum = open('probs/' + dataset + '/' + model + '.sum' + str(augm_cnt) + '.csv', 'w')
prob_file_log = open('probs/' + dataset + '/' + model + '.log' + str(augm_cnt) + '.csv', 'w')
dense_file = open('probs/' + dataset + '/'+ model + '.dense' + str(augm_cnt) + '.csv', 'w')
net = caffe.Classifier(deploy, model_path)
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
net.blobs['data'].reshape(augm_cnt, 1, 256, 768)
for iter in range(cnt):
if (dataset == 'val'):
name = file.readline().split(',')[0]
else:
name = file.readline().strip()[:-4]
X = np.zeros((augm_cnt, 1, 256, 768), dtype=np.float32)
for index in range(augm_cnt):
augm_path = folder + name + '.' + str(index) + '.png'
X[index] = transformer.preprocess('data', caffe.io.load_image(augm_path, color=False))
net.blobs['data'].data[...] = X
out = net.forward()['loss']
probs_sum = out.mean(axis=0)
probs_log = np.log(out + 1e-7).mean(axis=0)
dense = net.blobs['ip2new'].data
probs_sum = [str(x) for x in probs_sum]
prob_file_sum.write(','.join(probs_sum) + '\n')
probs_log = ["%f" % x for x in probs_log]
prob_file_log.write(','.join(probs_log) + '\n')
for index in range(augm_cnt):
tmp = [str(x) for x in dense[index]]
dense_file.write(','.join(tmp) + '\n')
if (iter % 10 == 0):
print "processed %d images" % (iter + 1)
================================================
FILE: get_score_from_probabilities.py
================================================
""" USAGE: python get_score_from_probabilities.py --prediction= --anwser=
prediction file may have less lines
"""
import sys
import numpy as np
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--prediction', type=str)
parser.add_argument('--answer', type=str, default='valDataNew.csv')
args = parser.parse_args()
print args
# info about classes
file = open('trainingData.csv')
data = file.readlines()[1:]
langs = set()
for line in data:
filepath, language = line.split(',')
language = language.strip()
langs.add(language)
langs = sorted(langs)
prediction_file = open(args.prediction, 'r')
prediction_lines = prediction_file.readlines()
answer_file = open(args.answer, 'r')
answer_lines = answer_file.readlines()
cnt = len(prediction_lines)
top_coder_score = 0.0
correct = 0
wrong_answers = open('wrong_answers.txt', 'w')
for iter in range(cnt):
st = answer_lines[iter]
(name, label) = st.split(',')
label = int(label)
out = prediction_lines[iter].split(',')
out = [float(x) for x in out]
pred = [(x, it) for it, x in enumerate(out)]
pred = sorted(pred, reverse=True)
if (pred[0][1] == label):
correct += 1
top_coder_score = top_coder_score + 1000
elif (pred[1][1] == label):
#correct += 1
top_coder_score = top_coder_score + 400
elif (pred[2][1] == label):
#correct += 1
top_coder_score = top_coder_score + 160
if (pred[0][1] != label):
print >> wrong_answers, answer_lines[iter] + prediction_lines[iter]
if ((iter + 1) % 100 == 0):
print >> sys.stderr, "processed %d / %d images" % (iter + 1, cnt)
print >> sys.stderr, "expected score:", top_coder_score / (iter + 1) * 35200
print >> sys.stderr, "Final score: ", top_coder_score, " / ", cnt, "000"
print >> sys.stderr, "expected score:", top_coder_score / cnt * 35200
print >> sys.stderr, "Accuracy: ", 100.0 * correct / cnt
================================================
FILE: get_score_from_top3_prediction.py
================================================
""" USAGE: python get_score_fromcsv.py --prediction= --anwser=
Prediction file may have less lines
Each line of prediction file must contain at least 3 integers: labels of top3
predictions, then it may have some additional information
"""
import sys
import numpy as np
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('--prediction', type=str)
parser.add_argument('--answer', type=str, default='valDataNew.csv')
args = parser.parse_args()
print args
# info about classes
file = open('trainingData.csv')
data = file.readlines()[1:]
langs = set()
for line in data:
filepath, language = line.split(',')
language = language.strip()
langs.add(language)
langs = sorted(langs)
prediction_file = open(args.prediction, 'r')
prediction_lines = prediction_file.readlines()
answer_file = open(args.answer, 'r')
answer_lines = answer_file.readlines()
cnt = len(prediction_lines)
top_coder_score = 0.0
correct = 0
wrong_answers = open('wrong_answers.txt', 'w')
for iter in range(cnt):
st = answer_lines[iter]
(name, label) = st.split(',')
label = int(label)
pred = prediction_lines[iter].split(',')
pred = [int(x) for x in pred]
if (pred[0] == label):
correct += 1
top_coder_score = top_coder_score + 1000
elif (pred[1] == label):
#correct += 1
top_coder_score = top_coder_score + 400
elif (pred[2] == label):
#correct += 1
top_coder_score = top_coder_score + 160
if (pred[0] != label):
print >> wrong_answers, (answer_lines[iter] + str(pred[3 + pred[0]]) + ',' + str(pred[3 + pred[1]]) + ',' +
str(pred[3 + pred[2]]) + ', votes for correct answer: ' + str(pred[3 + label]))
if ((iter + 1) % 100 == 0):
print >> sys.stderr, "processed %d / %d images" % (iter + 1, cnt)
print >> sys.stderr, "expected score:", top_coder_score / (iter + 1) * 35200
print >> sys.stderr, "Final score: ", top_coder_score, " / ", cnt, "000"
print >> sys.stderr, "expected score:", top_coder_score / cnt * 35200
print >> sys.stderr, "Accuracy: ", 100.0 * correct / cnt
================================================
FILE: get_sum_of_csvs.py
================================================
""" Usage: python get_sum_csvs.py csv1path csv2path ..
"""
import sys
import numpy as np
n_csv = len(sys.argv) - 1
cnt = 12320
csv = []
for index in range(1, len(sys.argv)):
csv.append(open(sys.argv[index], 'r'))
outfile = open('summed.csv', 'w')
for iter in range(12320):
out = np.zeros((176,), dtype=np.float32)
for index in range(n_csv):
cur_out = csv[index].readline().split(',')
cur_out = [float(x) for x in cur_out]
out += cur_out
out = [("%.6f" % x) for x in out]
outfile.write(','.join(out) + '\n')
================================================
FILE: majority_vote_ensembling.py
================================================
""" Usage: python majority_vote_ensembling.py csv1path csv2path ..
"""
import sys
import numpy as np
n_csv = len(sys.argv) - 1
train_cnt = 12320
csv = []
for index in range(1, len(sys.argv)):
csv.append(open(sys.argv[index], 'r'))
ensembled = open('top3_prediction_ensembled.csv', 'w')
for iter in range(train_cnt):
cnt = [0 for i in range(176)]
avg_prob = np.array([0.0 for i in range(176)])
for index in range(n_csv):
cur_prob = csv[index].readline().split(',')
cur_prob = np.array([float(x) for x in cur_prob])
avg_prob += cur_prob
prediction = cur_prob.argmax()
cnt[prediction] += 1
mas = [(cnt[index], avg_prob[index], index) for index in range(176)]
mas = sorted(mas, reverse=True)
ensembled.write(str(mas[0][2]) + ',' + str(mas[1][2]) + ',' + str(mas[2][2]) + ',')
ensembled.write(','.join([str(x) for x in cnt]) + '\n')
================================================
FILE: make_submission.py
================================================
""" Usage: python make_submission.py csvpath model_name
csv - must contain 12320 rows, 176 coloumns: the predictions for test set
"""
import sys
import numpy as np
# info about classes
file = open('trainingData.csv')
data = file.readlines()[1:]
langs = set()
for line in data:
filepath, language = line.split(',')
language = language.strip()
langs.add(language)
langs = sorted(langs)
path = sys.argv[1]
name = sys.argv[2]
read_file = open(path, 'r')
f = open('testingData.csv')
cnt = 12320
print_file = open('predictions/test_' + name + '.csv', 'w')
for iter in range(cnt):
st = f.readline()
name = st.strip()[:-4]
out = read_file.readline().split(',')
out = [float(x) for x in out]
pred = sorted([(x, it) for it, x in enumerate(out)], reverse=True)
for i in range(3):
lang_id = pred[i][1]
lang = langs[lang_id]
print_file.write(name + '.mp3,' + lang + ',' + str(i + 1) + '\n')
if (iter % 100 == 0):
print >> sys.stderr, "processed %d / %d images" % (iter + 1, cnt)
================================================
FILE: prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024r-1024r_DLR_nolrcoef.prototxt
================================================
name: "LangNet"
# DATA LAYERS
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
}
data_param {
source: "train/train_augm_db"
batch_size: 24
backend: LEVELDB
}
}
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
}
data_param {
source: "train/val_augm_db"
batch_size: 24
backend: LEVELDB
}
}
# CONV1-RELU1-POOL1
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 32
kernel_size: 7
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
# CONV2-RELU2-POOL2_
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 64
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
# CONV3-RELU3-POOL3
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV4-RELU4-POOL4
layer {
name: "conv4"
type: "Convolution"
bottom: "pool3"
top: "conv4"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV5-RELU5-POOL5
layer {
name: "conv5"
type: "Convolution"
bottom: "pool4"
top: "conv5"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV6-RELU6-POOL6
layer {
name: "conv6"
type: "Convolution"
bottom: "pool5"
top: "conv6"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 256
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "conv6"
top: "conv6"
}
layer {
name: "pool6"
type: "Pooling"
bottom: "conv6"
top: "pool6"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# IP layers
layer {
name: "ip1new"
type: "InnerProduct"
bottom: "pool6"
top: "ip1new"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 1024
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "reluOnIp1"
type: "ReLU"
bottom: "ip1new"
top: "ip1new"
}
layer {
name: "ip2new"
type: "InnerProduct"
bottom: "ip1new"
top: "ip2new"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 1024
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "reluOnIp2"
type: "ReLU"
bottom: "ip2new"
top: "ip2new"
}
layer {
name: "ip3new"
type: "InnerProduct"
bottom: "ip2new"
top: "ip3new"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 176
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "ip3new"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "ip3new"
bottom: "label"
top: "loss"
}
================================================
FILE: prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt
================================================
name: "LangNet"
# DATA LAYERS
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
}
data_param {
source: "train/train_augm_db"
batch_size: 23
backend: LEVELDB
}
}
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
}
data_param {
source: "train/val_augm_db"
batch_size: 24
backend: LEVELDB
}
}
# CONV1-RELU1-POOL1
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 32
kernel_size: 7
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
# CONV2-RELU2-POOL2_
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 64
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
# CONV3-RELU3-POOL3
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV4-RELU4-POOL4
layer {
name: "conv4"
type: "Convolution"
bottom: "pool3"
top: "conv4"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV5-RELU5-POOL5
layer {
name: "conv5"
type: "Convolution"
bottom: "pool4"
top: "conv5"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV6-RELU6-POOL6
layer {
name: "conv6"
type: "Convolution"
bottom: "pool5"
top: "conv6"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 256
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "conv6"
top: "conv6"
}
layer {
name: "pool6"
type: "Pooling"
bottom: "conv6"
top: "pool6"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# IP layers
layer {
name: "ip1new"
type: "InnerProduct"
bottom: "pool6"
top: "ip1new"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 1024
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "reluOnIp1"
type: "ReLU"
bottom: "ip1new"
top: "ip1new"
}
layer {
name: "dropOnIp1"
type: "Dropout"
dropout_param {
dropout_ratio: 0.3
}
bottom: "ip1new"
top: "ip1new"
}
layer {
name: "ip2new"
type: "InnerProduct"
bottom: "ip1new"
top: "ip2new"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 1024
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "reluOnIp2"
type: "ReLU"
bottom: "ip2new"
top: "ip2new"
}
layer {
name: "dropOnIp2"
type: "Dropout"
dropout_param {
dropout_ratio: 0.3
}
bottom: "ip2new"
top: "ip2new"
}
layer {
name: "ip3new"
type: "InnerProduct"
bottom: "ip2new"
top: "ip3new"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 176
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "ip3new"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "ip3new"
bottom: "label"
top: "loss"
}
================================================
FILE: prototxt/deploy.augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt
================================================
name: "LangNet"
# DATA LAYERS
input: "data"
input_dim: 1
input_dim: 1
input_dim: 256
input_dim: 768
# CONV1-RELU1-POOL1
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 32
kernel_size: 7
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
# CONV2-RELU2-POOL2_
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 64
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
# CONV3-RELU3-POOL3
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV4-RELU4-POOL4
layer {
name: "conv4"
type: "Convolution"
bottom: "pool3"
top: "conv4"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV5-RELU5-POOL5
layer {
name: "conv5"
type: "Convolution"
bottom: "pool4"
top: "conv5"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV6-RELU6-POOL6
layer {
name: "conv6"
type: "Convolution"
bottom: "pool5"
top: "conv6"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 256
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "conv6"
top: "conv6"
}
layer {
name: "pool6"
type: "Pooling"
bottom: "conv6"
top: "pool6"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# IP layers
layer {
name: "ip1new"
type: "InnerProduct"
bottom: "pool6"
top: "ip1new"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 1024
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "reluOnIp1"
type: "ReLU"
bottom: "ip1new"
top: "ip1new"
}
layer {
name: "dropOnIp1"
type: "Dropout"
dropout_param {
dropout_ratio: 0.3
}
bottom: "ip1new"
top: "ip1new"
}
layer {
name: "ip2new"
type: "InnerProduct"
bottom: "ip1new"
top: "ip2new"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 1024
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "reluOnIp2"
type: "ReLU"
bottom: "ip2new"
top: "ip2new"
}
layer {
name: "dropOnIp2"
type: "Dropout"
dropout_param {
dropout_ratio: 0.3
}
bottom: "ip2new"
top: "ip2new"
}
layer {
name: "ip3new"
type: "InnerProduct"
bottom: "ip2new"
top: "ip3new"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 176
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "loss"
type: "Softmax"
bottom: "ip3new"
top: "loss"
}
================================================
FILE: prototxt/deploy.main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt
================================================
name: "LangNet"
# DATA LAYERS
input: "data"
input_dim: 1
input_dim: 1
input_dim: 256
input_dim: 858
# CONV1-RELU1-POOL1
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 15
}
param {
lr_mult: 30
}
convolution_param {
num_output: 32
kernel_size: 7
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
# CONV2-RELU2-POOL2_
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 12
}
param {
lr_mult: 24
}
convolution_param {
num_output: 64
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
# CONV3-RELU3-POOL3
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 9
}
param {
lr_mult: 18
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV4-RELU4-POOL4
layer {
name: "conv4"
type: "Convolution"
bottom: "pool3"
top: "conv4"
param {
lr_mult: 4
}
param {
lr_mult: 8
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV5-RELU5-POOL5
layer {
name: "conv5"
type: "Convolution"
bottom: "pool4"
top: "conv5"
param {
lr_mult: 2
}
param {
lr_mult: 4
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV6-RELU6-POOL6
layer {
name: "conv6"
type: "Convolution"
bottom: "pool5"
top: "conv6"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 256
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "conv6"
top: "conv6"
}
layer {
name: "pool6"
type: "Pooling"
bottom: "conv6"
top: "pool6"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# IP layers
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool6"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 1024
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "reluOnIp1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "dropOnIp1"
type: "Dropout"
dropout_param {
dropout_ratio: 0.5
}
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 1024
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "reluOnIp2"
type: "ReLU"
bottom: "ip2"
top: "ip2"
}
layer {
name: "dropOnIp2"
type: "Dropout"
dropout_param {
dropout_ratio: 0.5
}
bottom: "ip2"
top: "ip2"
}
layer {
name: "ip3"
type: "InnerProduct"
bottom: "ip2"
top: "ip3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 176
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "loss"
type: "Softmax"
bottom: "ip3"
top: "loss"
}
================================================
FILE: prototxt/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt
================================================
name: "LangNet"
# DATA LAYERS
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
transform_param {
scale: 0.00390625
}
data_param {
source: "train/traindb"
batch_size: 32
backend: LEVELDB
}
}
layer {
name: "mnist"
type: "Data"
top: "data"
top: "label"
include {
phase: TEST
}
transform_param {
scale: 0.00390625
}
data_param {
source: "train/valdb"
batch_size: 1
backend: LEVELDB
}
}
# CONV1-RELU1-POOL1
layer {
name: "conv1"
type: "Convolution"
bottom: "data"
top: "conv1"
param {
lr_mult: 15
}
param {
lr_mult: 30
}
convolution_param {
num_output: 32
kernel_size: 7
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu1"
type: "ReLU"
bottom: "conv1"
top: "conv1"
}
layer {
name: "pool1"
type: "Pooling"
bottom: "conv1"
top: "pool1"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
# CONV2-RELU2-POOL2_
layer {
name: "conv2"
type: "Convolution"
bottom: "pool1"
top: "conv2"
param {
lr_mult: 12
}
param {
lr_mult: 24
}
convolution_param {
num_output: 64
kernel_size: 5
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu2"
type: "ReLU"
bottom: "conv2"
top: "conv2"
}
layer {
name: "pool2"
type: "Pooling"
bottom: "conv2"
top: "pool2"
pooling_param {
pool: MAX
kernel_size: 3
stride: 2
}
}
# CONV3-RELU3-POOL3
layer {
name: "conv3"
type: "Convolution"
bottom: "pool2"
top: "conv3"
param {
lr_mult: 9
}
param {
lr_mult: 18
}
convolution_param {
num_output: 64
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu3"
type: "ReLU"
bottom: "conv3"
top: "conv3"
}
layer {
name: "pool3"
type: "Pooling"
bottom: "conv3"
top: "pool3"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV4-RELU4-POOL4
layer {
name: "conv4"
type: "Convolution"
bottom: "pool3"
top: "conv4"
param {
lr_mult: 4
}
param {
lr_mult: 8
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu4"
type: "ReLU"
bottom: "conv4"
top: "conv4"
}
layer {
name: "pool4"
type: "Pooling"
bottom: "conv4"
top: "pool4"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV5-RELU5-POOL5
layer {
name: "conv5"
type: "Convolution"
bottom: "pool4"
top: "conv5"
param {
lr_mult: 2
}
param {
lr_mult: 4
}
convolution_param {
num_output: 128
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu5"
type: "ReLU"
bottom: "conv5"
top: "conv5"
}
layer {
name: "pool5"
type: "Pooling"
bottom: "conv5"
top: "pool5"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# CONV6-RELU6-POOL6
layer {
name: "conv6"
type: "Convolution"
bottom: "pool5"
top: "conv6"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
convolution_param {
num_output: 256
kernel_size: 3
stride: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "relu6"
type: "ReLU"
bottom: "conv6"
top: "conv6"
}
layer {
name: "pool6"
type: "Pooling"
bottom: "conv6"
top: "pool6"
pooling_param {
pool: MAX
kernel_size: 3
stride:2
}
}
# IP layers
layer {
name: "ip1"
type: "InnerProduct"
bottom: "pool6"
top: "ip1"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 1024
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "reluOnIp1"
type: "ReLU"
bottom: "ip1"
top: "ip1"
}
layer {
name: "dropOnIp1"
type: "Dropout"
dropout_param {
dropout_ratio: 0.5
}
bottom: "ip1"
top: "ip1"
}
layer {
name: "ip2"
type: "InnerProduct"
bottom: "ip1"
top: "ip2"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 1024
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "reluOnIp2"
type: "ReLU"
bottom: "ip2"
top: "ip2"
}
layer {
name: "dropOnIp2"
type: "Dropout"
dropout_param {
dropout_ratio: 0.5
}
bottom: "ip2"
top: "ip2"
}
layer {
name: "ip3"
type: "InnerProduct"
bottom: "ip2"
top: "ip3"
param {
lr_mult: 1
}
param {
lr_mult: 2
}
inner_product_param {
num_output: 176
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
}
}
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "ip3"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "ip3"
bottom: "label"
top: "loss"
}
================================================
FILE: prototxt/solver.augm.nolrcoef.prototxt
================================================
net: "prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt"
test_iter: 512
test_interval: 1500
# The base learning rate, momentum and the weight decay of the network.
base_lr: 0.01
weight_decay: 0.0000
# The learning rate policy
# lr_policy: "fixed"
# solver_type: ADADELTA
lr_policy: "inv"
gamma: 0.0003
power: 0.9
#lr_policy: "step"
#gamma: 0.9
#stepsize: 6000
display: 1
max_iter: 800000
snapshot: 3000
snapshot_prefix: "models/augm_dropout0.3_on_augm84K-lr0.01_30K_90K"
#log: "logs/augm_dropout0.3_on_augm84K-lr0.01_30K_90K.txt"
solver_mode: GPU
================================================
FILE: prototxt/solver.main.adadelta.prototxt
================================================
net: "prototxt/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt"
test_iter: 100
test_interval: 100
# The base learning rate, momentum and the weight decay of the network.
weight_decay: 0.0000
# The learning rate policy
base_lr: 0.01
lr_policy: "fixed"
solver_type: ADADELTA
display: 1
max_iter: 800000
snapshot: 3000
snapshot_prefix: "models/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR_adadelta0.01"
#log: "logs/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR_adadelta0.01.txt"
solver_mode: GPU
================================================
FILE: test_augm_network.py
================================================
import sys
import caffe
import numpy as np
caffe.set_mode_gpu()
# info about classes
file = open('trainingData.csv')
data = file.readlines()[1:]
langs = set()
for line in data:
filepath, language = line.split(',')
language = language.strip()
langs.add(language)
langs = sorted(langs)
# network parameters:
deploy_name = 'augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3'
network_name = 'augm_dropout0.3_on_augm84K-lr0.01_30K'
iterations = '90000'
aveSamples = 20 # average over this many samples
net = caffe.Classifier(model_file='prototxt/deploy.' + deploy_name + '.prototxt',
pretrained_file='models/' + network_name + '_iter_' + iterations + '.caffemodel')
net.blobs['data'].reshape(1, 1, 256, 768)
predict_set = sys.argv[1]
if (predict_set == "test"):
folder = 'test/png/'
f = open('testingData.csv')
cnt = 12320
print_file = open('predictions/test_' + network_name + '_iter_' + iterations + '_' + str(aveSamples) + '.csv', 'w')
elif (predict_set == "val"):
folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/'
f = open('valEqual.csv')
cnt = 12320
print_file = open('predictions/validation_' + network_name + '_iter_' + iterations + '_' + str(aveSamples) + '.csv', 'w')
else: # train
folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/'
f = open('trainEqual.csv')
cnt = 10000
print_file = open('predictions/train_' + network_name + '_iter_' + iterations + '_' + str(aveSamples) + '.csv', 'w')
preds = []
labels = []
topcoder_score = 0.0
processed = 0
for iter in range(cnt):
st = f.readline()
if (predict_set == "val" or predict_set == "train"):
(name, label) = st.split(',')
label = int(label)
else:
name = st.strip()[:-4]
processed += 1
out = np.zeros((176, ))
for randomIndex in range(aveSamples):
image = caffe.io.load_image(folder + name + '.' + str(randomIndex) + '.png', color=False)
image = np.transpose(image, (2, 0, 1))
#image = np.concatenate([image, np.zeros((1, 256, 858 - 768), dtype=np.float32)], axis=2)
net.blobs['data'].data[...] = image
out += net.forward()['loss'][0]
pred = sorted([(x, it) for it, x in enumerate(out)], reverse=True)
if (predict_set == "val" or predict_set == "train"):
if (pred[0][1] == label):
topcoder_score = topcoder_score + 1000
elif (pred[1][1] == label):
topcoder_score = topcoder_score + 400
elif (pred[2][1] == label):
topcoder_score = topcoder_score + 160
for i in range(3):
lang_id = pred[i][1]
lang = langs[lang_id]
print_file.write(name + '.mp3,' + lang + ',' + str(i + 1) + '\n')
if (iter % 100 == 0):
print >> sys.stderr, network_name + '_iter_' + iterations + '_' + str(aveSamples)
print >> sys.stderr, "processed %d / %d images (%d samples/mp3)" % (iter, cnt, aveSamples)
print >> sys.stderr, "score: ", topcoder_score
print >> sys.stderr, "expected score:", topcoder_score / processed * 35200
print >> sys.stderr, "Final score: ", topcoder_score, " / ", cnt, "000"
print >> sys.stderr, "expected score:", topcoder_score / processed * 35200
================================================
FILE: test_main_network.py
================================================
import sys
import caffe
import numpy as np
caffe.set_mode_gpu()
# info about classes
file = open('trainingData.csv')
data = file.readlines()[1:]
langs = set()
for line in data:
filepath, language = line.split(',')
language = language.strip()
langs.add(language)
langs = sorted(langs)
# network parameters:
deploy_name = 'main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR'
network_name = deploy_name + '_150K-momentum'
iterations = '51000'
net = caffe.Classifier(model_file='prototxt/deploy.' + deploy_name + '.prototxt',
pretrained_file='models/' + network_name + '_iter_' + iterations + '.caffemodel')
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_transpose('data', (2, 0, 1))
net.blobs['data'].reshape(1, 1, 256, 858)
predict_set = sys.argv[1]
if (predict_set == "test"):
folder = 'test/png/'
f = open('testingData.csv')
cnt = 12320
print_file = open('predictions/test_' + network_name + '_iter_' + iterations + '.csv', 'w')
elif (predict_set == "val"):
folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/' ## stegh dreci augm
f = open('valDataNew.csv')
cnt = 16176
print_file = open('predictions/validation_' + network_name + '_iter_' + iterations + '.csv', 'w')
else: # train
folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/png/'
f = open('trainingDataNew.csv')
cnt = 10000
print_file = open('predictions/train_' + network_name + '_iter_' + iterations + '.csv', 'w')
preds = []
labels = []
topcoder_score = 0
processed = 0
for iter in range(cnt):
st = f.readline()
if (predict_set == "val" or predict_set == "train"):
(name, label) = st.split(',')
label = int(label)
else:
name = st.strip()[:-4]
processed += 1
net.blobs['data'].data[...] = transformer.preprocess('data',
caffe.io.load_image(folder + name + '.png', color=False))
out = net.forward()['loss'][0]
pred = sorted([(x, it) for it, x in enumerate(out)], reverse=True)
if (predict_set == "val" or predict_set == "train"):
if (pred[0][1] == label):
topcoder_score = topcoder_score + 1000
elif (pred[1][1] == label):
topcoder_score = topcoder_score + 400
elif (pred[2][1] == label):
topcoder_score = topcoder_score + 160
for i in range(3):
lang_id = pred[i][1]
lang = langs[lang_id]
print_file.write(name + '.mp3,' + lang + ',' + str(i + 1) + '\n')
if (iter % 100 == 0):
print >> sys.stderr, "processed %d / %d images" % (iter, cnt)
print >> sys.stderr, "score: ", topcoder_score
print >> sys.stderr, "expected score:", topcoder_score / processed * 35200
print >> sys.stderr, "Final score: ", topcoder_score, " / ", cnt, "000"
print >> sys.stderr, "expected score:", topcoder_score / processed * 35200
================================================
FILE: theano/README.md
================================================
# Spoken language identification
`networks` folder contains multiple CNN and/or RNN models implemented in Theano/Lasagne.
Read more in the corresponding [blog post](http://yerevann.github.io/2016/06/26/combining-cnn-and-rnn-for-spoken-language-identification/).
================================================
FILE: theano/main.py
================================================
import sys
import numpy as np
import sklearn.metrics as metrics
import argparse
import time
import json
import importlib
print "==> parsing input arguments"
parser = argparse.ArgumentParser()
# TODO: add argument to choose training set
parser.add_argument('--network', type=str, default="network_batch", help='embeding size (50, 100, 200, 300 only)')
parser.add_argument('--epochs', type=int, default=500, help='number of epochs to train')
parser.add_argument('--load_state', type=str, default="", help='state file path')
parser.add_argument('--mode', type=str, default="train", help='mode: train/test/test_on_train')
parser.add_argument('--batch_size', type=int, default=32, help='no commment')
parser.add_argument('--l2', type=float, default=0, help='L2 regularization')
parser.add_argument('--log_every', type=int, default=100, help='print information every x iteration')
parser.add_argument('--save_every', type=int, default=50000, help='save state every x iteration')
parser.add_argument('--prefix', type=str, default="", help='optional prefix of network name')
parser.add_argument('--dropout', type=float, default=0.0, help='dropout rate (between 0 and 1)')
parser.add_argument('--no-batch_norm', dest="batch_norm", action='store_false', help='batch normalization')
parser.add_argument('--rnn_num_units', type=int, default=500, help='number of hidden units if the network is RNN')
parser.add_argument('--equal_split', type=bool, default=False, help='use trainEqual.csv and valEqual.csv')
parser.add_argument('--forward_cnt', type=int, default=1, help='if forward pass is nondeterministic, then how many forward passes are averaged')
parser.set_defaults(batch_norm=True)
args = parser.parse_args()
print args
if (args.equal_split):
train_listfile = open("/mnt/hdd615/Hrayr/Spoken-language-identification/trainEqual.csv", "r")
test_listfile = open("/mnt/hdd615/Hrayr/Spoken-language-identification/valEqual.csv", "r")
else:
train_listfile = open("/mnt/hdd615/Hrayr/Spoken-language-identification/trainingDataNew.csv", "r")
test_listfile = open("/mnt/hdd615/Hrayr/Spoken-language-identification/valDataNew.csv", "r")
train_list_raw = train_listfile.readlines()
test_list_raw = test_listfile.readlines()
print "==> %d training examples" % len(train_list_raw)
print "==> %d validation examples" % len(test_list_raw)
train_listfile.close()
test_listfile.close()
args_dict = dict(args._get_kwargs())
args_dict['train_list_raw'] = train_list_raw
args_dict['test_list_raw'] = test_list_raw
args_dict['png_folder'] = "/mnt/hdd615/Hrayr/Spoken-language-identification/train/png/"
print "==> using network %s" % args.network
network_module = importlib.import_module("networks." + args.network)
network = network_module.Network(**args_dict)
network_name = args.prefix + '%s.bs%d%s%s' % (
network.say_name(),
args.batch_size,
".bn" if args.batch_norm else "",
(".d" + str(args.dropout)) if args.dropout>0 else "")
print "==> network_name:", network_name
start_epoch = 0
if args.load_state != "":
start_epoch = network.load_state(args.load_state) + 1
def do_epoch(mode, epoch):
# mode is 'train' or 'test' or 'predict'
y_true = []
y_pred = []
avg_loss = 0.0
prev_time = time.time()
batches_per_epoch = network.get_batches_per_epoch(mode)
all_prediction = []
for i in range(0, batches_per_epoch):
step_data = network.step(i, mode)
prediction = step_data["prediction"]
answers = step_data["answers"]
current_loss = step_data["current_loss"]
log = step_data["log"]
avg_loss += current_loss
if (mode == "predict" or mode == "predict_on_train"):
all_prediction.append(prediction)
for pass_id in range(args.forward_cnt-1):
step_data = network.step(i, mode)
prediction += step_data["prediction"]
current_loss += step_data["current_loss"]
prediction /= args.forward_cnt
current_loss /= args.forward_cnt
for x in answers:
y_true.append(x)
for x in prediction.argmax(axis=1):
y_pred.append(x)
if ((i + 1) % args.log_every == 0):
cur_time = time.time()
print (" %sing: %d.%d / %d \t loss: %3f \t avg_loss: %.5f \t %s \t time: %.2fs" %
(mode, epoch, (i + 1) * args.batch_size, batches_per_epoch * args.batch_size,
current_loss, avg_loss / (i + 1), log, cur_time - prev_time))
prev_time = cur_time
#print "confusion matrix:"
#print metrics.confusion_matrix(y_true, y_pred)
accuracy = sum([1 if t == p else 0 for t, p in zip(y_true, y_pred)])
print "accuracy: %.2f percent" % (accuracy * 100.0 / batches_per_epoch / args.batch_size)
if (mode == "predict"):
all_prediction = np.vstack(all_prediction)
pred_filename = "predictions/" + ("equal_split." if args.equal_split else "") + \
args.load_state[args.load_state.rfind('/')+1:] + ".csv"
with open(pred_filename, 'w') as pred_csv:
for x in all_prediction:
print >> pred_csv, ",".join([("%.6f" % prob) for prob in x])
return avg_loss / batches_per_epoch
if args.mode == 'train':
print "==> training"
for epoch in range(start_epoch, args.epochs):
do_epoch('train', epoch)
test_loss = do_epoch('test', epoch)
state_name = 'states/%s.epoch%d.test%.5f.state' % (network_name, epoch, test_loss)
print "==> saving ... %s" % state_name
network.save_params(state_name, epoch)
elif args.mode == 'test':
do_epoch('predict', 0)
elif args.mode == 'test_on_train':
do_epoch('predict_on_train', 0)
else:
raise Exception("unknown mode")
================================================
FILE: theano/networks/__init__.py
================================================
================================================
FILE: theano/networks/base_network.py
================================================
import cPickle as pickle
class BaseNetwork:
def say_name(self):
return "unknown"
def save_params(self, file_name, epoch, **kwargs):
with open(file_name, 'w') as save_file:
pickle.dump(
obj = {
'params' : [x.get_value() for x in self.params],
'epoch' : epoch,
},
file = save_file,
protocol = -1
)
def load_state(self, file_name):
print "==> loading state %s" % file_name
epoch = 0
with open(file_name, 'r') as load_file:
dict = pickle.load(load_file)
loaded_params = dict['params']
for (x, y) in zip(self.params, loaded_params):
x.set_value(y)
epoch = dict['epoch']
return epoch
def get_batches_per_epoch(self, mode):
if (mode == 'train' or mode == 'predict_on_train'):
return len(self.train_list_raw) / self.batch_size
elif (mode == 'test' or mode == 'predict'):
return len(self.test_list_raw) / self.batch_size
else:
raise Exception("unknown mode")
def step(self, batch_index, mode):
if (mode == "train"):
data, answers = self.read_batch(self.train_list_raw, batch_index)
theano_fn = self.train_fn
elif (mode == "test" or mode == "predict"):
data, answers = self.read_batch(self.test_list_raw, batch_index)
theano_fn = self.test_fn
elif (mode == "predict_on_train"):
data, answers = self.read_batch(self.train_list_raw, batch_index)
theano_fn = self.test_fn
else:
raise Exception("unrecognized mode")
ret = theano_fn(data, answers)
return {"prediction": ret[0],
"answers": answers,
"current_loss": ret[1],
"log": "",
}
================================================
FILE: theano/networks/rnn.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, l2, mode, rnn_num_units, **kwargs):
print "==> not used params in DMN class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.l2 = l2
self.mode = mode
self.num_units = rnn_num_units
self.input_var = T.tensor3('input_var')
self.answer_var = T.ivector('answer_var')
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 858, 256), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
# InputLayer
network = layers.InputLayer(shape=(None, 858, 256), input_var=self.input_var)
print layers.get_output(network).eval({self.input_var:example}).shape
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
print layers.get_output(network).eval({self.input_var:example}).shape
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params = layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
if (self.l2 > 0):
self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network,
lasagne.regularization.l2)
else:
self.loss_l2 = 0
self.loss = self.loss_ce + self.loss_l2
#updates = lasagne.updates.adadelta(self.loss, self.params)
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.0005)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "rnn.GRU.num_units%d" % self.num_units
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 858, 256), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
data[i - start_index, :, :] = np.transpose(np.array(im).astype(np.float32) / 256.0)
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/networks/rnn_2layers.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, l2, mode, rnn_num_units, batch_norm, **kwargs):
print "==> not used params in DMN class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.l2 = l2
self.mode = mode
self.num_units = rnn_num_units
self.batch_norm = batch_norm
self.input_var = T.tensor3('input_var')
self.answer_var = T.ivector('answer_var')
# scale inputs to be in [-1, 1]
input_var_norm = 2 * self.input_var - 1
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 858, 256), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
# InputLayer
network = layers.InputLayer(shape=(None, 858, 256), input_var=input_var_norm)
print layers.get_output(network).eval({self.input_var:example}).shape
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units)
print layers.get_output(network).eval({self.input_var:example}).shape
# BatchNormalization Layer
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
print layers.get_output(network).eval({self.input_var:example}).shape
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
print layers.get_output(network).eval({self.input_var:example}).shape
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params = layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
if (self.l2 > 0):
self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network,
lasagne.regularization.l2)
else:
self.loss_l2 = 0
self.loss = self.loss_ce + self.loss_l2
#updates = lasagne.updates.adadelta(self.loss, self.params)
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "rnn_2layers.GRU.num_units%d" % self.num_units
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 858, 256), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
data[i - start_index, :, :] = np.transpose(np.array(im).astype(np.float32) / 256.0)
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/networks/rnn_2layers_5khz.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, l2, mode, rnn_num_units, batch_norm, **kwargs):
print "==> not used params in network class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.l2 = l2
self.mode = mode
self.num_units = rnn_num_units
self.batch_norm = batch_norm
self.input_var = T.tensor3('input_var')
self.answer_var = T.ivector('answer_var')
# scale inputs to be in [-1, 1]
input_var_norm = 2 * self.input_var - 1
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 858, 128), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
# InputLayer
network = layers.InputLayer(shape=(None, 858, 128), input_var=input_var_norm)
print layers.get_output(network).eval({self.input_var:example}).shape
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units)
print layers.get_output(network).eval({self.input_var:example}).shape
# BatchNormalization Layer
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
print layers.get_output(network).eval({self.input_var:example}).shape
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
print layers.get_output(network).eval({self.input_var:example}).shape
# BatchNormalization Layer
# There are some states, where this layer was disabled
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
print layers.get_output(network).eval({self.input_var:example}).shape
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params = layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
if (self.l2 > 0):
self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network,
lasagne.regularization.l2)
else:
self.loss_l2 = 0
self.loss = self.loss_ce + self.loss_l2
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "rnn_2layers_5khz.GRU.num_units%d" % self.num_units
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 858, 128), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
data[i - start_index, :, :] = np.transpose(np.array(im).astype(np.float32) / 256.0)[:, :128]
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/networks/tc_net.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, **kwargs):
print "==> not used params in DMN class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.dropout = dropout
self.l2 = l2
self.mode = mode
self.batch_norm = batch_norm
self.input_var = T.tensor4('input_var')
self.answer_var = T.ivector('answer_var')
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 1, 256, 858), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
network = layers.InputLayer(shape=(None, 1, 256, 858), input_var=self.input_var)
print layers.get_output(network).eval({self.input_var:example}).shape
# CONV-RELU-POOL 1
network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 2
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 3
network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 4
network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 5
network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 6
network = layers.Conv2DLayer(incoming=network, num_filters=256, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(3, 2), ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# DENSE 1
network = layers.DenseLayer(incoming=network, num_units=1024, nonlinearity=rectify)
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
if (self.dropout > 0):
network = layers.dropout(network, self.dropout)
print layers.get_output(network).eval({self.input_var:example}).shape
"""
# DENSE 2
network = layers.DenseLayer(incoming=network, num_units=1024, nonlinearity=rectify)
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
if (self.dropout > 0):
network = layers.dropout(network, self.dropout)
print layers.get_output(network).eval({self.input_var:example}).shape
"""
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params = layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
if (self.l2 > 0):
self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network,
lasagne.regularization.l2)
else:
self.loss_l2 = 0
self.loss = self.loss_ce + self.loss_l2
#updates = lasagne.updates.adadelta(self.loss, self.params)
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "tc_net"
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 1, 256, 858), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
data[i - start_index, 0, :, :] = np.array(im).astype(np.float32) / 256.0
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/networks/tc_net_deeprnn_shared_pad.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
print "==> not used params in DMN class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.dropout = dropout
self.l2 = l2
self.mode = mode
self.batch_norm = batch_norm
self.num_units = rnn_num_units
self.input_var = T.tensor4('input_var')
self.answer_var = T.ivector('answer_var')
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
print layers.get_output(network).eval({self.input_var:example}).shape
# CONV-RELU-POOL 1
network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 2
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 3
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 4
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
self.params = layers.get_all_params(network, trainable=True)
output = layers.get_output(network)
num_channels = 32
filter_W = 54
filter_H = 8
# NOTE: these constants are shapes of last pool layer, it can be symbolic
# explicit values are better for optimizations
channels = []
for channel_index in range(num_channels):
channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))
rnn_network_outputs = []
W_in_to_updategate = None
W_hid_to_updategate = None
b_updategate = None
W_in_to_resetgate = None
W_hid_to_resetgate = None
b_resetgate = None
W_in_to_hidden_update = None
W_hid_to_hidden_update = None
b_hidden_update = None
W_in_to_updategate1 = None
W_hid_to_updategate1 = None
b_updategate1 = None
W_in_to_resetgate1 = None
W_hid_to_resetgate1 = None
b_resetgate1 = None
W_in_to_hidden_update1 = None
W_hid_to_hidden_update1 = None
b_hidden_update1 = None
for channel_index in range(num_channels):
rnn_input_var = channels[channel_index]
# InputLayer
network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)
if (channel_index == 0):
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=False)
W_in_to_updategate = network.W_in_to_updategate
W_hid_to_updategate = network.W_hid_to_updategate
b_updategate = network.b_updategate
W_in_to_resetgate = network.W_in_to_resetgate
W_hid_to_resetgate = network.W_hid_to_resetgate
b_resetgate = network.b_resetgate
W_in_to_hidden_update = network.W_in_to_hidden_update
W_hid_to_hidden_update = network.W_hid_to_hidden_update
b_hidden_update = network.b_hidden_update
# BatchNormalization Layer
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
W_in_to_updategate1 = network.W_in_to_updategate
W_hid_to_updategate1 = network.W_hid_to_updategate
b_updategate1 = network.b_updategate
W_in_to_resetgate1 = network.W_in_to_resetgate
W_hid_to_resetgate1 = network.W_hid_to_resetgate
b_resetgate1 = network.b_resetgate
W_in_to_hidden_update1 = network.W_in_to_hidden_update
W_hid_to_hidden_update1 = network.W_hid_to_hidden_update
b_hidden_update1 = network.b_hidden_update
# add params
self.params += layers.get_all_params(network, trainable=True)
else:
# GRULayer, but shared
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=False,
resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate),
updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate),
hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update))
# BatchNormalization Layer
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# GRULayer, but shared
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True,
resetgate=layers.Gate(W_in=W_in_to_resetgate1, W_hid=W_hid_to_resetgate1, b=b_resetgate1),
updategate=layers.Gate(W_in=W_in_to_updategate1, W_hid=W_hid_to_updategate1, b=b_updategate1),
hidden_update=layers.Gate(W_in=W_in_to_hidden_update1, W_hid=W_hid_to_hidden_update1, b=b_hidden_update1))
rnn_network_outputs.append(layers.get_output(network))
all_output_var = T.concatenate(rnn_network_outputs, axis=1)
print all_output_var.eval({self.input_var:example}).shape
# InputLayer
network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)
# Dropout Layer
if (self.dropout > 0):
network = layers.dropout(network, self.dropout)
# BatchNormalization Layer
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params += layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
#print "==> param shapes", [x.eval().shape for x in self.params]
self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
if (self.l2 > 0):
self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params,
lasagne.regularization.l2)
else:
self.loss_l2 = 0
self.loss = self.loss_ce + self.loss_l2
#updates = lasagne.updates.adadelta(self.loss, self.params)
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "tc_net_deeprnn.4conv.pad.GRU.shared.num_units%d.5khz" % self.num_units
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/networks/tc_net_mod.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, **kwargs):
print "==> not used params in DMN class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.dropout = dropout
self.l2 = l2
self.mode = mode
self.batch_norm = batch_norm
self.input_var = T.tensor4('input_var')
self.answer_var = T.ivector('answer_var')
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 1, 256, 858), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
network = layers.InputLayer(shape=(None, 1, 256, 858), input_var=self.input_var)
print layers.get_output(network).eval({self.input_var:example}).shape
# NOTE: replace pad=2 with ignore_border=False
# CONV-RELU-POOL 1
network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 2
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 3
network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 4
network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 5
network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 6
network = layers.Conv2DLayer(incoming=network, num_filters=256, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(3, 2), pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# DENSE 1
network = layers.DenseLayer(incoming=network, num_units=1024, nonlinearity=rectify)
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
if (self.dropout > 0):
network = layers.dropout(network, self.dropout)
print layers.get_output(network).eval({self.input_var:example}).shape
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params = layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
print "==> param shapes", [x.eval().shape for x in self.params]
self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
if (self.l2 > 0):
self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network,
lasagne.regularization.l2)
else:
self.loss_l2 = 0
self.loss = self.loss_ce + self.loss_l2
#updates = lasagne.updates.adadelta(self.loss, self.params)
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "tc_net_mod"
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 1, 256, 858), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
data[i - start_index, 0, :, :] = np.array(im).astype(np.float32) / 256.0
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/networks/tc_net_mod_5khz_small.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, **kwargs):
print "==> not used params in DMN class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.dropout = dropout
self.l2 = l2
self.mode = mode
self.batch_norm = batch_norm
self.input_var = T.tensor4('input_var')
self.answer_var = T.ivector('answer_var')
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
print layers.get_output(network).eval({self.input_var:example}).shape
# CONV-RELU-POOL 1
network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 2
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 3
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 4
network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 5
network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# DENSE 1
network = layers.DenseLayer(incoming=network, num_units=256, nonlinearity=rectify)
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
if (self.dropout > 0):
network = layers.dropout(network, self.dropout)
print layers.get_output(network).eval({self.input_var:example}).shape
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params = layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
self.test_prediction = layers.get_output(network, deterministic=True)
print "==> param shapes", [x.eval().shape for x in self.params]
def get_loss(prediction):
loss_ce = lasagne.objectives.categorical_crossentropy(prediction, self.answer_var).mean()
if (self.l2 > 0):
loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network,
lasagne.regularization.l2)
else:
loss_l2 = 0
return loss_ce + loss_l2
self.loss = get_loss(self.prediction)
self.test_loss = get_loss(self.test_prediction)
#updates = lasagne.updates.adadelta(self.loss, self.params)
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
# deterministic version
#self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
# outputs=[self.test_prediction, self.test_loss])
# non deterministic version, as train_fn
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "tc_net_mod_5khz_small"
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/networks/tc_net_rnn.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
print "==> not used params in DMN class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.dropout = dropout
self.l2 = l2
self.mode = mode
self.batch_norm = batch_norm
self.num_units = rnn_num_units
self.input_var = T.tensor4('input_var')
self.answer_var = T.ivector('answer_var')
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
print layers.get_output(network).eval({self.input_var:example}).shape
# CONV-RELU-POOL 1
network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 2
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 3
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
self.params = layers.get_all_params(network, trainable=True)
output = layers.get_output(network)
num_channels = 32
filter_W = 104
filter_H = 13
# NOTE: these constants are shapes of last pool layer, it can be symbolic
# explicit values are better for optimizations
channels = []
for channel_index in range(num_channels):
channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))
rnn_network_outputs = []
for channel_index in range(num_channels):
rnn_input_var = channels[channel_index]
# InputLayer
network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
# BatchNormalization Layer
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# add params
self.params += layers.get_all_params(network, trainable=True)
rnn_network_outputs.append(layers.get_output(network))
all_output_var = T.concatenate(rnn_network_outputs, axis=1)
print all_output_var.eval({self.input_var:example}).shape
# InputLayer
network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)
# DENSE 1
network = layers.DenseLayer(incoming=network, num_units=512, nonlinearity=rectify)
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
if (self.dropout > 0):
network = layers.dropout(network, self.dropout)
print layers.get_output(network).eval({self.input_var:example}).shape
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params += layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
#print "==> param shapes", [x.eval().shape for x in self.params]
self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
if (self.l2 > 0):
self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params,
lasagne.regularization.l2)
else:
self.loss_l2 = 0
self.loss = self.loss_ce + self.loss_l2
#updates = lasagne.updates.adadelta(self.loss, self.params)
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "tc_net_rnn.GRU.3conv.num_units%d.5khz" % self.num_units
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/networks/tc_net_rnn_nodense.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
print "==> not used params in DMN class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.dropout = dropout
self.l2 = l2
self.mode = mode
self.batch_norm = batch_norm
self.num_units = rnn_num_units
self.input_var = T.tensor4('input_var')
self.answer_var = T.ivector('answer_var')
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
print layers.get_output(network).eval({self.input_var:example}).shape
# CONV-RELU-POOL 1
network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 2
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 3
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
self.params = layers.get_all_params(network, trainable=True)
output = layers.get_output(network)
num_channels = 32
filter_W = 104
filter_H = 13
# NOTE: these constants are shapes of last pool layer, it can be symbolic
# explicit values are better for optimizations
channels = []
for channel_index in range(num_channels):
channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))
rnn_network_outputs = []
for channel_index in range(num_channels):
rnn_input_var = channels[channel_index]
# InputLayer
network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
# BatchNormalization Layer
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# add params
self.params += layers.get_all_params(network, trainable=True)
rnn_network_outputs.append(layers.get_output(network))
all_output_var = T.concatenate(rnn_network_outputs, axis=1)
print all_output_var.eval({self.input_var:example}).shape
# InputLayer
network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)
"""
# DENSE 1
network = layers.DenseLayer(incoming=network, num_units=512, nonlinearity=rectify)
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
if (self.dropout > 0):
network = layers.dropout(network, self.dropout)
print layers.get_output(network).eval({self.input_var:example}).shape
"""
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params += layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
#print "==> param shapes", [x.eval().shape for x in self.params]
self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
if (self.l2 > 0):
self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params,
lasagne.regularization.l2)
else:
self.loss_l2 = 0
self.loss = self.loss_ce + self.loss_l2
#updates = lasagne.updates.adadelta(self.loss, self.params)
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "tc_net_rnn.3conv.GRU.num_units%d.nodense.5khz" % self.num_units
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/networks/tc_net_rnn_onernn.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
print "==> not used params in DMN class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.dropout = dropout
self.l2 = l2
self.mode = mode
self.batch_norm = batch_norm
self.num_units = rnn_num_units
self.input_var = T.tensor4('input_var')
self.answer_var = T.ivector('answer_var')
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
print layers.get_output(network).eval({self.input_var:example}).shape
# CONV-RELU-POOL 1
network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 2
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 3
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 4
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
self.params = layers.get_all_params(network, trainable=True)
output = layers.get_output(network)
output = output.transpose((0, 3, 1, 2))
output = output.flatten(ndim=3)
# NOTE: these constants are shapes of last pool layer, it can be symbolic
# explicit values are better for optimizations
num_channels = 32
filter_W = 54
filter_H = 8
# InputLayer
network = layers.InputLayer(shape=(None, filter_W, num_channels * filter_H), input_var=output)
print layers.get_output(network).eval({self.input_var:example}).shape
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
if (self.dropout > 0):
network = layers.dropout(network, self.dropout)
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params += layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
#print "==> param shapes", [x.eval().shape for x in self.params]
self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
if (self.l2 > 0):
self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params,
lasagne.regularization.l2)
else:
self.loss_l2 = 0
self.loss = self.loss_ce + self.loss_l2
#updates = lasagne.updates.adadelta(self.loss, self.params)
#updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) # good one
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.0003)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "tc_net_rnn.4conv.pad.GRU.onernn.num_units%d.5khz" % self.num_units
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/networks/tc_net_rnn_onernn_notimepool.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
print "==> not used params in DMN class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.dropout = dropout
self.l2 = l2
self.mode = mode
self.batch_norm = batch_norm
self.num_units = rnn_num_units
self.input_var = T.tensor4('input_var')
self.answer_var = T.ivector('answer_var')
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
print layers.get_output(network).eval({self.input_var:example}).shape
# CONV-RELU-POOL 1
network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 2
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 3
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 4
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
self.params = layers.get_all_params(network, trainable=True)
output = layers.get_output(network)
output = output.transpose((0, 3, 1, 2))
output = output.flatten(ndim=3)
# NOTE: these constants are shapes of last pool layer, it can be symbolic
# explicit values are better for optimizations
num_channels = 32
filter_W = 852
filter_H = 8
# InputLayer
network = layers.InputLayer(shape=(None, filter_W, num_channels * filter_H), input_var=output)
print layers.get_output(network).eval({self.input_var:example}).shape
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
if (self.dropout > 0):
network = layers.dropout(network, self.dropout)
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params += layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
#print "==> param shapes", [x.eval().shape for x in self.params]
self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
if (self.l2 > 0):
self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params,
lasagne.regularization.l2)
else:
self.loss_l2 = 0
self.loss = self.loss_ce + self.loss_l2
#updates = lasagne.updates.adadelta(self.loss, self.params)
#updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) # good one
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.001)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "tc_net_rnn.4conv.pad.GRU.onernn.notimepool.num_units%d.5khz" % self.num_units
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/networks/tc_net_rnn_shared.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
print "==> not used params in DMN class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.dropout = dropout
self.l2 = l2
self.mode = mode
self.batch_norm = batch_norm
self.num_units = rnn_num_units
self.input_var = T.tensor4('input_var')
self.answer_var = T.ivector('answer_var')
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
print layers.get_output(network).eval({self.input_var:example}).shape
# CONV-RELU-POOL 1
network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 2
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 3
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
self.params = layers.get_all_params(network, trainable=True)
output = layers.get_output(network)
num_channels = 32
filter_W = 104
filter_H = 13
# NOTE: these constants are shapes of last pool layer, it can be symbolic
# explicit values are better for optimizations
channels = []
for channel_index in range(num_channels):
channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))
rnn_network_outputs = []
W_in_to_updategate = None
W_hid_to_updategate = None
b_updategate = None
W_in_to_resetgate = None
W_hid_to_resetgate = None
b_resetgate = None
W_in_to_hidden_update = None
W_hid_to_hidden_update = None
b_hidden_update = None
for channel_index in range(num_channels):
rnn_input_var = channels[channel_index]
# InputLayer
network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)
if (channel_index == 0):
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
W_in_to_updategate = network.W_in_to_updategate
W_hid_to_updategate = network.W_hid_to_updategate
b_updategate = network.b_updategate
W_in_to_resetgate = network.W_in_to_resetgate
W_hid_to_resetgate = network.W_hid_to_resetgate
b_resetgate = network.b_resetgate
W_in_to_hidden_update = network.W_in_to_hidden_update
W_hid_to_hidden_update = network.W_hid_to_hidden_update
b_hidden_update = network.b_hidden_update
# add params
self.params += layers.get_all_params(network, trainable=True)
else:
# GRULayer, but shared
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True,
resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate),
updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate),
hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update))
rnn_network_outputs.append(layers.get_output(network))
all_output_var = T.concatenate(rnn_network_outputs, axis=1)
print all_output_var.eval({self.input_var:example}).shape
# InputLayer
network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)
# BatchNormalization Layer
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params += layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
#print "==> param shapes", [x.eval().shape for x in self.params]
self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
if (self.l2 > 0):
self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params,
lasagne.regularization.l2)
else:
self.loss_l2 = 0
self.loss = self.loss_ce + self.loss_l2
#updates = lasagne.updates.adadelta(self.loss, self.params)
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "tc_net_rnn.3conv.GRU.shared.num_units%d.5khz" % self.num_units
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/networks/tc_net_rnn_shared_pad.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
print "==> not used params in DMN class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.dropout = dropout
self.l2 = l2
self.mode = mode
self.batch_norm = batch_norm
self.num_units = rnn_num_units
self.input_var = T.tensor4('input_var')
self.answer_var = T.ivector('answer_var')
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var)
print layers.get_output(network).eval({self.input_var:example}).shape
# CONV-RELU-POOL 1
network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 2
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 3
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 4
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
self.params = layers.get_all_params(network, trainable=True)
output = layers.get_output(network)
num_channels = 32
filter_W = 54
filter_H = 8
# NOTE: these constants are shapes of last pool layer, it can be symbolic
# explicit values are better for optimizations
channels = []
for channel_index in range(num_channels):
channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))
rnn_network_outputs = []
W_in_to_updategate = None
W_hid_to_updategate = None
b_updategate = None
W_in_to_resetgate = None
W_hid_to_resetgate = None
b_resetgate = None
W_in_to_hidden_update = None
W_hid_to_hidden_update = None
b_hidden_update = None
for channel_index in range(num_channels):
rnn_input_var = channels[channel_index]
# InputLayer
network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)
if (channel_index == 0):
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
W_in_to_updategate = network.W_in_to_updategate
W_hid_to_updategate = network.W_hid_to_updategate
b_updategate = network.b_updategate
W_in_to_resetgate = network.W_in_to_resetgate
W_hid_to_resetgate = network.W_hid_to_resetgate
b_resetgate = network.b_resetgate
W_in_to_hidden_update = network.W_in_to_hidden_update
W_hid_to_hidden_update = network.W_hid_to_hidden_update
b_hidden_update = network.b_hidden_update
# add params
self.params += layers.get_all_params(network, trainable=True)
else:
# GRULayer, but shared
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True,
resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate),
updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate),
hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update))
rnn_network_outputs.append(layers.get_output(network))
all_output_var = T.concatenate(rnn_network_outputs, axis=1)
print all_output_var.eval({self.input_var:example}).shape
# InputLayer
network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)
# Dropout Layer
if (self.dropout > 0):
network = layers.dropout(network, self.dropout)
# BatchNormalization Layer
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params += layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
#print "==> param shapes", [x.eval().shape for x in self.params]
self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
if (self.l2 > 0):
self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params,
lasagne.regularization.l2)
else:
self.loss_l2 = 0
self.loss = self.loss_ce + self.loss_l2
#updates = lasagne.updates.adadelta(self.loss, self.params)
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "tc_net_rnn.4conv.pad.GRU.shared.num_units%d.5khz" % self.num_units
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/networks/tc_net_rnn_shared_pad_augm.py
================================================
import random
import numpy as np
import theano
import theano.tensor as T
import lasagne
from lasagne import layers
from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh
import PIL.Image as Image
from base_network import BaseNetwork
floatX = theano.config.floatX
class Network(BaseNetwork):
def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs):
print "==> not used params in DMN class:", kwargs.keys()
self.train_list_raw = train_list_raw
self.test_list_raw = test_list_raw
self.png_folder = png_folder
self.batch_size = batch_size
self.dropout = dropout
self.l2 = l2
self.mode = mode
self.batch_norm = batch_norm
self.num_units = rnn_num_units
self.input_var = T.tensor4('input_var')
self.answer_var = T.ivector('answer_var')
print "==> building network"
example = np.random.uniform(size=(self.batch_size, 1, 128, 768), low=0.0, high=1.0).astype(np.float32) #########
answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) #########
network = layers.InputLayer(shape=(None, 1, 128, 768), input_var=self.input_var)
print layers.get_output(network).eval({self.input_var:example}).shape
# CONV-RELU-POOL 1
network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 2
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 3
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# CONV-RELU-POOL 4
network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3),
stride=1, nonlinearity=rectify)
print layers.get_output(network).eval({self.input_var:example}).shape
network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2)
print layers.get_output(network).eval({self.input_var:example}).shape
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
self.params = layers.get_all_params(network, trainable=True)
output = layers.get_output(network)
num_channels = 32
filter_W = 48
filter_H = 8
# NOTE: these constants are shapes of last pool layer, it can be symbolic
# explicit values are better for optimizations
channels = []
for channel_index in range(num_channels):
channels.append(output[:, channel_index, :, :].transpose((0, 2, 1)))
rnn_network_outputs = []
W_in_to_updategate = None
W_hid_to_updategate = None
b_updategate = None
W_in_to_resetgate = None
W_hid_to_resetgate = None
b_resetgate = None
W_in_to_hidden_update = None
W_hid_to_hidden_update = None
b_hidden_update = None
for channel_index in range(num_channels):
rnn_input_var = channels[channel_index]
# InputLayer
network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var)
if (channel_index == 0):
# GRULayer
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True)
W_in_to_updategate = network.W_in_to_updategate
W_hid_to_updategate = network.W_hid_to_updategate
b_updategate = network.b_updategate
W_in_to_resetgate = network.W_in_to_resetgate
W_hid_to_resetgate = network.W_hid_to_resetgate
b_resetgate = network.b_resetgate
W_in_to_hidden_update = network.W_in_to_hidden_update
W_hid_to_hidden_update = network.W_hid_to_hidden_update
b_hidden_update = network.b_hidden_update
# add params
self.params += layers.get_all_params(network, trainable=True)
else:
# GRULayer, but shared
network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True,
resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate),
updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate),
hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update))
rnn_network_outputs.append(layers.get_output(network))
all_output_var = T.concatenate(rnn_network_outputs, axis=1)
print all_output_var.eval({self.input_var:example}).shape
# InputLayer
network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var)
# Dropout Layer
if (self.dropout > 0):
network = layers.dropout(network, self.dropout)
# BatchNormalization Layer
if (self.batch_norm):
network = layers.BatchNormLayer(incoming=network)
# Last layer: classification
network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax)
print layers.get_output(network).eval({self.input_var:example}).shape
self.params += layers.get_all_params(network, trainable=True)
self.prediction = layers.get_output(network)
#print "==> param shapes", [x.eval().shape for x in self.params]
self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean()
if (self.l2 > 0):
self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params,
lasagne.regularization.l2)
else:
self.loss_l2 = 0
self.loss = self.loss_ce + self.loss_l2
#updates = lasagne.updates.adadelta(self.loss, self.params)
updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003)
if self.mode == 'train':
print "==> compiling train_fn"
self.train_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss],
updates=updates)
print "==> compiling test_fn"
self.test_fn = theano.function(inputs=[self.input_var, self.answer_var],
outputs=[self.prediction, self.loss])
def say_name(self):
return "tc_net_rnn.4conv.pad.GRU.shared.num_units%d.5khz.augm" % self.num_units
def read_batch(self, data_raw, batch_index):
start_index = batch_index * self.batch_size
end_index = start_index + self.batch_size
data = np.zeros((self.batch_size, 1, 128, 768), dtype=np.float32)
answers = []
for i in range(start_index, end_index):
answers.append(int(data_raw[i].split(',')[1]))
name = data_raw[i].split(',')[0]
path = self.png_folder + name + ".png"
im = Image.open(path)
offset = random.randint(0, 90)
data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, offset:offset+768] / 256.0
answers = np.array(answers, dtype=np.int32)
return data, answers
================================================
FILE: theano/plot.py
================================================
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import sys
import argparse
import os
#parsing arguments
parser = argparse.ArgumentParser()
parser.add_argument('--plot', type=str, default='plot.png', help='plotfile name with .png')
parser.add_argument('--log', type=str, default='log.txt', help='log file name')
parser.add_argument('--winVal', type=int, default='200', help='window for Val')
parser.add_argument('--winTrain', type=int, default='200', help='window for Train')
parser.add_argument('--no-legend', dest='legend', action='store_false')
parser.add_argument('--no-accuracy', dest='accuracy', action='store_false')
parser.add_argument('--no-loss', dest='loss', action='store_false')
parser.add_argument('--start_epoch', type=float, default=-1.0, help='start plotting from that epoch')
parser.set_defaults(loss=True)
parser.set_defaults(legend=True)
parser.set_defaults(accuracy=True)
args = parser.parse_args()
plotname = args.plot
windowVal = args.winVal
windowTrain = args.winTrain
accuracy = []
def movingAverage(loss, window):
mas = []
for i in range(len(loss)):
j = i - window + 1
if (j < 0):
j = 0
sum = 0.0
for k in range(window):
sum += loss[j + k]
mas.append(sum / window)
return mas
def plotTrainVal(filename, index, plotLabel):
valx = []
valy = []
trainx = []
trainy = []
train_accuracyx = []
train_accuracyy = []
val_accuracyx = []
val_accuracyy = []
with open(filename, 'r') as logfile:
for st in logfile.readlines():
head = st.split('\t')[0].strip()
if (head[:7] == 'testing' or head[:8] == 'training'):
iteration_expr = head[head.find(':')+1:]
divpos = iteration_expr.find('/')
first = iteration_expr[:divpos]
iterations_per_epoch = float(iteration_expr[divpos+1:])
dotpos = first.find('.')
epoch = float(first[:dotpos])
iteration = float(first[dotpos+1:])
x = epoch + iteration / iterations_per_epoch
st_loss = st[st.find("avg_loss"):]
cur_loss = float(st_loss[st_loss.find(':')+1:st_loss.find('\t')])
if (head[:7] == 'testing'):
valx.append(x)
valy.append(cur_loss)
else:
trainx.append(x)
trainy.append(cur_loss)
if st.strip()[:8] == "accuracy":
cur_accuracy = float(st[st.find(':')+1:st.find("percent")]) / 100.0
if (len(train_accuracyx) > len(val_accuracyx)):
val_accuracyx.append(valx[-1])
val_accuracyy.append(cur_accuracy)
else:
train_accuracyx.append(trainx[-1])
train_accuracyy.append(cur_accuracy)
while(len(valx) > 0 and valx[0] < args.start_epoch):
valx = valx[1:]
valy = valy[1:]
while(len(trainx) > 0 and trainx[0] < args.start_epoch):
trainx = trainx[1:]
trainy = trainy[1:]
#window config
wndVal = min(windowVal, int(0.8 * len(valy)))
wndTrain = min(windowTrain, int(0.8 * len(trainy)))
print "Train length: ", len(trainy), " \t\t window: ", wndTrain
print "Val length: ", len(valy), " \t\t window: ", wndVal
#movAvg and correcting length
#valy = movingAverage(valy, wndVal)
#trainy = movingAverage(trainy, wndTrain)
#valx = valx[:len(valy)]
#trainx = trainx[:len(trainy)]
#plotting
greenDiff = 50
redBlueDiff = 50
if (args.loss):
plt.plot(trainx, trainy, '#00' + hex(index * greenDiff)[2:]
+ hex(256 - index * redBlueDiff)[2:],
label=plotLabel + " train")
plt.hold(True)
plt.plot(valx, valy, '#' + hex(256 - index * redBlueDiff)[2:]
+ hex(index * greenDiff)[2:] + '00',
label=plotLabel + " validation")
plt.hold(True)
if (args.accuracy):
plt.plot(train_accuracyx, train_accuracyy, '#000000',
label=plotLabel + " train_accuracy")
plt.hold(True)
plt.plot(val_accuracyx, val_accuracyy, '#00FF00',
label=plotLabel + " val_accuracy")
plt.hold(True)
print "plot index =", index
for (x, y) in zip(val_accuracyx, val_accuracyy):
print "\tepoch = %.0f, accuracy = %f" % (x - 1, y)
print '\tMax: %f // Epoch: %d' % (max(val_accuracyy), val_accuracyx[val_accuracyy.index(max(val_accuracyy))])
plotTrainVal(args.log, 1, args.log)
if (args.legend):
plt.legend(loc='upper right', fontsize='x-small')
plt.gcf().savefig(plotname)
gitextract_7gedhokp/
├── .gitignore
├── LICENSE
├── README.md
├── augment_data.py
├── choose_equal_split.py
├── concatenate_csvs.py
├── create_spectrograms.py
├── ensembling/
│ ├── ensemble.theano.py
│ └── get_output_layers.py
├── get_score_from_probabilities.py
├── get_score_from_top3_prediction.py
├── get_sum_of_csvs.py
├── majority_vote_ensembling.py
├── make_submission.py
├── prototxt/
│ ├── augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024r-1024r_DLR_nolrcoef.prototxt
│ ├── augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt
│ ├── deploy.augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt
│ ├── deploy.main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt
│ ├── main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt
│ ├── solver.augm.nolrcoef.prototxt
│ └── solver.main.adadelta.prototxt
├── test_augm_network.py
├── test_main_network.py
└── theano/
├── README.md
├── main.py
├── networks/
│ ├── __init__.py
│ ├── base_network.py
│ ├── rnn.py
│ ├── rnn_2layers.py
│ ├── rnn_2layers_5khz.py
│ ├── tc_net.py
│ ├── tc_net_deeprnn_shared_pad.py
│ ├── tc_net_mod.py
│ ├── tc_net_mod_5khz_small.py
│ ├── tc_net_rnn.py
│ ├── tc_net_rnn_nodense.py
│ ├── tc_net_rnn_onernn.py
│ ├── tc_net_rnn_onernn_notimepool.py
│ ├── tc_net_rnn_shared.py
│ ├── tc_net_rnn_shared_pad.py
│ └── tc_net_rnn_shared_pad_augm.py
└── plot.py
SYMBOL INDEX (73 symbols across 20 files)
FILE: augment_data.py
function stft (line 9) | def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
function logscale_spec (line 26) | def logscale_spec(spec, sr=44100, factor=20., alpha=1.0, f0=0.9, fmax=1):
function plotstft (line 66) | def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="gray", c...
FILE: create_spectrograms.py
function stft (line 9) | def stft(sig, frameSize, overlapFac=0.5, window=np.hanning):
function logscale_spec (line 26) | def logscale_spec(spec, sr=44100, factor=20., alpha=1.0, f0=0.9, fmax=1):
function plotstft (line 66) | def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="gray", c...
FILE: ensembling/ensemble.theano.py
function get_score (line 15) | def get_score(probs, label):
function get_full_score (line 25) | def get_full_score(preds, labels):
FILE: theano/main.py
function do_epoch (line 73) | def do_epoch(mode, epoch):
FILE: theano/networks/base_network.py
class BaseNetwork (line 4) | class BaseNetwork:
method say_name (line 6) | def say_name(self):
method save_params (line 10) | def save_params(self, file_name, epoch, **kwargs):
method load_state (line 22) | def load_state(self, file_name):
method get_batches_per_epoch (line 34) | def get_batches_per_epoch(self, mode):
method step (line 43) | def step(self, batch_index, mode):
FILE: theano/networks/rnn.py
class Network (line 17) | class Network(BaseNetwork):
method __init__ (line 19) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 74) | def say_name(self):
method read_batch (line 78) | def read_batch(self, data_raw, batch_index):
FILE: theano/networks/rnn_2layers.py
class Network (line 17) | class Network(BaseNetwork):
method __init__ (line 19) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 87) | def say_name(self):
method read_batch (line 91) | def read_batch(self, data_raw, batch_index):
FILE: theano/networks/rnn_2layers_5khz.py
class Network (line 17) | class Network(BaseNetwork):
method __init__ (line 19) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 92) | def say_name(self):
method read_batch (line 96) | def read_batch(self, data_raw, batch_index):
FILE: theano/networks/tc_net.py
class Network (line 17) | class Network(BaseNetwork):
method __init__ (line 19) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 144) | def say_name(self):
method read_batch (line 148) | def read_batch(self, data_raw, batch_index):
FILE: theano/networks/tc_net_deeprnn_shared_pad.py
class Network (line 16) | class Network(BaseNetwork):
method __init__ (line 18) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 217) | def say_name(self):
method read_batch (line 221) | def read_batch(self, data_raw, batch_index):
FILE: theano/networks/tc_net_mod.py
class Network (line 17) | class Network(BaseNetwork):
method __init__ (line 19) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 139) | def say_name(self):
method read_batch (line 143) | def read_batch(self, data_raw, batch_index):
FILE: theano/networks/tc_net_mod_5khz_small.py
class Network (line 17) | class Network(BaseNetwork):
method __init__ (line 19) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 139) | def say_name(self):
method read_batch (line 143) | def read_batch(self, data_raw, batch_index):
FILE: theano/networks/tc_net_rnn.py
class Network (line 17) | class Network(BaseNetwork):
method __init__ (line 19) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 150) | def say_name(self):
method read_batch (line 154) | def read_batch(self, data_raw, batch_index):
FILE: theano/networks/tc_net_rnn_nodense.py
class Network (line 17) | class Network(BaseNetwork):
method __init__ (line 19) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 150) | def say_name(self):
method read_batch (line 154) | def read_batch(self, data_raw, batch_index):
FILE: theano/networks/tc_net_rnn_onernn.py
class Network (line 17) | class Network(BaseNetwork):
method __init__ (line 19) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 137) | def say_name(self):
method read_batch (line 141) | def read_batch(self, data_raw, batch_index):
FILE: theano/networks/tc_net_rnn_onernn_notimepool.py
class Network (line 17) | class Network(BaseNetwork):
method __init__ (line 19) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 138) | def say_name(self):
method read_batch (line 142) | def read_batch(self, data_raw, batch_index):
FILE: theano/networks/tc_net_rnn_shared.py
class Network (line 17) | class Network(BaseNetwork):
method __init__ (line 19) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 169) | def say_name(self):
method read_batch (line 173) | def read_batch(self, data_raw, batch_index):
FILE: theano/networks/tc_net_rnn_shared_pad.py
class Network (line 17) | class Network(BaseNetwork):
method __init__ (line 19) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 183) | def say_name(self):
method read_batch (line 187) | def read_batch(self, data_raw, batch_index):
FILE: theano/networks/tc_net_rnn_shared_pad_augm.py
class Network (line 17) | class Network(BaseNetwork):
method __init__ (line 19) | def __init__(self, train_list_raw, test_list_raw, png_folder, batch_si...
method say_name (line 183) | def say_name(self):
method read_batch (line 187) | def read_batch(self, data_raw, batch_index):
FILE: theano/plot.py
function movingAverage (line 32) | def movingAverage(loss, window):
function plotTrainVal (line 45) | def plotTrainVal(filename, index, plotLabel):
Condensed preview — 42 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (183K chars).
[
{
"path": ".gitignore",
"chars": 6,
"preview": "*.pyc\n"
},
{
"path": "LICENSE",
"chars": 1065,
"preview": "MIT License\n\nCopyright (c) 2016 YerevaNN\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\no"
},
{
"path": "README.md",
"chars": 1371,
"preview": "# Spoken language identification with deep learning\n\nRead more in the following blog posts:\n\n* [About TopCoder contest a"
},
{
"path": "augment_data.py",
"chars": 3852,
"preview": "import numpy as np\nfrom matplotlib import pyplot as plt\nimport scipy.io.wavfile as wav\nfrom numpy.lib import stride_tric"
},
{
"path": "choose_equal_split.py",
"chars": 929,
"preview": "\"\"\"split data into training and validation sets\"\"\"\nimport csv\n\nwith open('trainingData.csv', 'rb') as csvfile:\n next("
},
{
"path": "concatenate_csvs.py",
"chars": 537,
"preview": "\"\"\" Usage: python concatenate_csvs.py csv1path csv2path ..\n\"\"\"\nimport sys\nimport numpy as np\n\nn_csv = len(sys.argv) - 1\n"
},
{
"path": "create_spectrograms.py",
"chars": 4197,
"preview": "import numpy as np\nfrom matplotlib import pyplot as plt\nimport scipy.io.wavfile as wav\nfrom numpy.lib import stride_tric"
},
{
"path": "ensembling/ensemble.theano.py",
"chars": 4791,
"preview": "\"\"\" Usage: python ensemble.theano.py model1 [another_model]*\n \nfor GPU mode\n 1. export PATH=$PATH:/usr/local/cuda-"
},
{
"path": "ensembling/get_output_layers.py",
"chars": 3211,
"preview": "\"\"\" Usage: python get_output_layers.py test|val\n\"\"\"\nimport sys\nimport caffe\nimport numpy as np\n\ncaffe.set_mode_gpu()\n\nde"
},
{
"path": "get_score_from_probabilities.py",
"chars": 1951,
"preview": "\"\"\" USAGE: python get_score_from_probabilities.py --prediction= --anwser=\n prediction file may have less lines\n\"\"\"\nim"
},
{
"path": "get_score_from_top3_prediction.py",
"chars": 2124,
"preview": "\"\"\" USAGE: python get_score_fromcsv.py --prediction= --anwser=\n \n Prediction file may have less lines\n \n Each"
},
{
"path": "get_sum_of_csvs.py",
"chars": 563,
"preview": "\"\"\" Usage: python get_sum_csvs.py csv1path csv2path ..\n\"\"\"\nimport sys\nimport numpy as np\n\nn_csv = len(sys.argv) - 1\ncnt "
},
{
"path": "majority_vote_ensembling.py",
"chars": 929,
"preview": "\"\"\" Usage: python majority_vote_ensembling.py csv1path csv2path ..\n\"\"\"\nimport sys\nimport numpy as np\n\nn_csv = len(sys.ar"
},
{
"path": "make_submission.py",
"chars": 1049,
"preview": "\"\"\" Usage: python make_submission.py csvpath model_name\ncsv - must contain 12320 rows, 176 coloumns: the predictions for"
},
{
"path": "prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024r-1024r_DLR_nolrcoef.prototxt",
"chars": 5088,
"preview": "name: \"LangNet\"\n# DATA LAYERS\nlayer {\n name: \"mnist\"\n type: \"Data\"\n top: \"data\"\n top: \"label\"\n include {\n phase:"
},
{
"path": "prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt",
"chars": 5344,
"preview": "name: \"LangNet\"\n# DATA LAYERS\nlayer {\n name: \"mnist\"\n type: \"Data\"\n top: \"data\"\n top: \"label\"\n include {\n phase:"
},
{
"path": "prototxt/deploy.augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt",
"chars": 4772,
"preview": "name: \"LangNet\"\n# DATA LAYERS\ninput: \"data\"\ninput_dim: 1\ninput_dim: 1\ninput_dim: 256\ninput_dim: 768\n\n# CONV1-RELU1-POOL1"
},
{
"path": "prototxt/deploy.main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt",
"chars": 4726,
"preview": "name: \"LangNet\"\n# DATA LAYERS\ninput: \"data\"\ninput_dim: 1\ninput_dim: 1\ninput_dim: 256\ninput_dim: 858\n\n# CONV1-RELU1-POOL1"
},
{
"path": "prototxt/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt",
"chars": 5282,
"preview": "name: \"LangNet\"\n# DATA LAYERS\nlayer {\n name: \"mnist\"\n type: \"Data\"\n top: \"data\"\n top: \"label\"\n include {\n phase:"
},
{
"path": "prototxt/solver.augm.nolrcoef.prototxt",
"chars": 588,
"preview": "net: \"prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt\"\n\ntest_iter: 512\ntest_interval: "
},
{
"path": "prototxt/solver.main.adadelta.prototxt",
"chars": 574,
"preview": "net: \"prototxt/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt\"\n\ntest_iter: 100\ntest_interv"
},
{
"path": "test_augm_network.py",
"chars": 3278,
"preview": "import sys\nimport caffe\nimport numpy as np\n\ncaffe.set_mode_gpu()\n\n# info about classes\nfile = open('trainingData.csv')\nd"
},
{
"path": "test_main_network.py",
"chars": 2955,
"preview": "import sys\nimport caffe\nimport numpy as np\n\ncaffe.set_mode_gpu()\n\n# info about classes\nfile = open('trainingData.csv')\nd"
},
{
"path": "theano/README.md",
"chars": 264,
"preview": "# Spoken language identification\n\n`networks` folder contains multiple CNN and/or RNN models implemented in Theano/Lasagn"
},
{
"path": "theano/main.py",
"chars": 5870,
"preview": "import sys\nimport numpy as np\nimport sklearn.metrics as metrics\nimport argparse\nimport time\nimport json\nimport importlib"
},
{
"path": "theano/networks/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "theano/networks/base_network.py",
"chars": 1573,
"preview": "import cPickle as pickle\n\n\nclass BaseNetwork:\n\t\n\tdef say_name(self):\n\t\treturn \"unknown\"\n\t\n\t\n\tdef save_params(self, file_"
},
{
"path": "theano/networks/rnn.py",
"chars": 3734,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/networks/rnn_2layers.py",
"chars": 4288,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/networks/rnn_2layers_5khz.py",
"chars": 4519,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/networks/tc_net.py",
"chars": 7636,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/networks/tc_net_deeprnn_shared_pad.py",
"chars": 10959,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/networks/tc_net_mod.py",
"chars": 7311,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/networks/tc_net_mod_5khz_small.py",
"chars": 7172,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/networks/tc_net_rnn.py",
"chars": 7319,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/networks/tc_net_rnn_nodense.py",
"chars": 7327,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/networks/tc_net_rnn_onernn.py",
"chars": 7050,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/networks/tc_net_rnn_onernn_notimepool.py",
"chars": 7087,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/networks/tc_net_rnn_shared.py",
"chars": 8404,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/networks/tc_net_rnn_shared_pad.py",
"chars": 9048,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/networks/tc_net_rnn_shared_pad_augm.py",
"chars": 9099,
"preview": "import random\nimport numpy as np\n\nimport theano\nimport theano.tensor as T\n\nimport lasagne\nfrom lasagne import layers\nfro"
},
{
"path": "theano/plot.py",
"chars": 4855,
"preview": "import numpy as np\nimport matplotlib\nmatplotlib.use('Agg')\nimport matplotlib.pyplot as plt\nimport sys\nimport argparse\nim"
}
]
About this extraction
This page contains the full source code of the YerevaNN/Spoken-language-identification GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 42 files (168.6 KB), approximately 47.3k tokens, and a symbol index with 73 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.