Repository: YerevaNN/Spoken-language-identification Branch: master Commit: e947dee00f30 Files: 42 Total size: 168.6 KB Directory structure: gitextract_7gedhokp/ ├── .gitignore ├── LICENSE ├── README.md ├── augment_data.py ├── choose_equal_split.py ├── concatenate_csvs.py ├── create_spectrograms.py ├── ensembling/ │ ├── ensemble.theano.py │ └── get_output_layers.py ├── get_score_from_probabilities.py ├── get_score_from_top3_prediction.py ├── get_sum_of_csvs.py ├── majority_vote_ensembling.py ├── make_submission.py ├── prototxt/ │ ├── augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024r-1024r_DLR_nolrcoef.prototxt │ ├── augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt │ ├── deploy.augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt │ ├── deploy.main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt │ ├── main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt │ ├── solver.augm.nolrcoef.prototxt │ └── solver.main.adadelta.prototxt ├── test_augm_network.py ├── test_main_network.py └── theano/ ├── README.md ├── main.py ├── networks/ │ ├── __init__.py │ ├── base_network.py │ ├── rnn.py │ ├── rnn_2layers.py │ ├── rnn_2layers_5khz.py │ ├── tc_net.py │ ├── tc_net_deeprnn_shared_pad.py │ ├── tc_net_mod.py │ ├── tc_net_mod_5khz_small.py │ ├── tc_net_rnn.py │ ├── tc_net_rnn_nodense.py │ ├── tc_net_rnn_onernn.py │ ├── tc_net_rnn_onernn_notimepool.py │ ├── tc_net_rnn_shared.py │ ├── tc_net_rnn_shared_pad.py │ └── tc_net_rnn_shared_pad_augm.py └── plot.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.pyc ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2016 YerevaNN Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # Spoken language identification with deep learning Read more in the following blog posts: * [About TopCoder contest and our CNN-based solution implemented in Caffe](http://yerevann.github.io/2015/10/11/spoken-language-identification-with-deep-convolutional-networks/) (October 2015) * [About combining CNN and RNN using Theano/Lasagne](http://yerevann.github.io/2016/06/26/combining-cnn-and-rnn-for-spoken-language-identification/) (June 2016) Theano/Lasagne models are [here](/theano). The basic steps to run them are: * Download the dataset from [here](https://community.topcoder.com/longcontest/?module=ViewProblemStatement&rd=16555&pm=13978) or use your own dataset. * Create spectrograms for recording using `create_spectrograms.py` or `augment_data.py`. The latter will also augment the data by randomly perturbing the spectrograms and cropping a random interval of length 9s from the recording. * Create listfiles for training set and validation set, where each row of the a listfile describes one example and has 2 values seperated by a comma. The first one is the name of the example, the second one is the label (counting starts from 0). A typical listfile will look like [this](https://gist.github.com/Harhro94/aa11fe6b454c614cdedea882fd00f8d7). * Change the `png_folder` and listfile paths in [`theano/main.py`](/theano/main.py). * Run `theano/main.py`. ================================================ FILE: augment_data.py ================================================ import numpy as np from matplotlib import pyplot as plt import scipy.io.wavfile as wav from numpy.lib import stride_tricks import PIL.Image as Image import os """ short time fourier transform of audio signal """ def stft(sig, frameSize, overlapFac=0.5, window=np.hanning): win = window(frameSize) hopSize = int(frameSize - np.floor(overlapFac * frameSize)) # zeros at beginning (thus center of 1st window should be for sample nr. 0) samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig) # cols for windowing cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1 # zeros at end (thus samples can be fully covered by frames) samples = np.append(samples, np.zeros(frameSize)) frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy() frames *= win return np.fft.rfft(frames) """ scale frequency axis logarithmically """ def logscale_spec(spec, sr=44100, factor=20., alpha=1.0, f0=0.9, fmax=1): spec = spec[:, 0:256] timebins, freqbins = np.shape(spec) scale = np.linspace(0, 1, freqbins) #** factor # http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=650310&url=http%3A%2F%2Fieeexplore.ieee.org%2Fiel4%2F89%2F14168%2F00650310 scale = np.array(map(lambda x: x * alpha if x <= f0 else (fmax-alpha*f0)/(fmax-f0)*(x-f0)+alpha*f0, scale)) scale *= (freqbins-1)/max(scale) newspec = np.complex128(np.zeros([timebins, freqbins])) allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1]) freqs = [0.0 for i in range(freqbins)] totw = [0.0 for i in range(freqbins)] for i in range(0, freqbins): if (i < 1 or i + 1 >= freqbins): newspec[:, i] += spec[:, i] freqs[i] += allfreqs[i] totw[i] += 1.0 continue else: # scale[15] = 17.2 w_up = scale[i] - np.floor(scale[i]) w_down = 1 - w_up j = int(np.floor(scale[i])) newspec[:, j] += w_down * spec[:, i] freqs[j] += w_down * allfreqs[i] totw[j] += w_down newspec[:, j + 1] += w_up * spec[:, i] freqs[j + 1] += w_up * allfreqs[i] totw[j + 1] += w_up for i in range(len(freqs)): if (totw[i] > 1e-6): freqs[i] /= totw[i] return newspec, freqs """ plot spectrogram""" def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="gray", channel=0, name='tmp.png', alpha=1, offset=0): samplerate, samples = wav.read(audiopath) samples = samples[:, channel] s = stft(samples, binsize) sshow, freq = logscale_spec(s, factor=1, sr=samplerate, alpha=alpha) sshow = sshow[2:, :] ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel timebins, freqbins = np.shape(ims) ims = np.transpose(ims) ims = ims[0:256, offset:offset+768] # 0-11khz, ~9s interval #print "ims.shape", ims.shape image = Image.fromarray(ims) image = image.convert('L') image.save(name) file = open('trainingData.csv', 'r') for iter, line in enumerate(file.readlines()[1:]): # first line of traininData.csv is header (only for trainingData.csv) filepath = line.split(',')[0] filename = filepath[:-4] wavfile = 'tmp.wav' os.system('mpg123 -w ' + wavfile + ' /home/brainstorm/caffe/Data/mnt/3/language/train/mp3/' + filepath) for augmentIdx in range(0, 20): alpha = np.random.uniform(0.9, 1.1) offset = np.random.randint(90) plotstft(wavfile, channel=0, name='/home/brainstorm/data/language/train/pngaugm/'+filename+'.'+str(augmentIdx)+'.png', alpha=alpha, offset=offset) os.remove(wavfile) print "processed %d files" % (iter + 1) ================================================ FILE: choose_equal_split.py ================================================ """split data into training and validation sets""" import csv with open('trainingData.csv', 'rb') as csvfile: next(csvfile) #skip headers data = list(csv.reader(csvfile, delimiter=',')) #Map every language to an ID langs = set([language.strip() for _,language in data]) ID = {lang: i for i,lang in enumerate(sorted(langs))} #Write first 306 items to training set and the rest to validation set cnt = [0 for _ in range(len(langs))] with open('trainEqual.csv', 'w') as train: with open('valEqaul.csv', 'w') as val: for line in data: filepath, language = map(str.strip, line) id_lang = ID[language] if (cnt[id_lang] < 306): train.write(filepath[:-4] + ',' + str(id_lang) + '\n') else: val.write(filepath[:-4] + ',' + str(id_lang) + '\n') cnt[id_lang] += 1 ================================================ FILE: concatenate_csvs.py ================================================ """ Usage: python concatenate_csvs.py csv1path csv2path .. """ import sys import numpy as np n_csv = len(sys.argv) - 1 cnt = 12320 csv = [] for index in range(1, len(sys.argv)): csv.append(open(sys.argv[index], 'r')) outfile = open('concatenated.csv', 'w') for iter in range(12320): out = [] for index in range(n_csv): cur_out = csv[index].readline().split(',') cur_out = [float(x) for x in cur_out] out += cur_out out = [("%.6f" % x) for x in out] outfile.write(','.join(out) + '\n') ================================================ FILE: create_spectrograms.py ================================================ import numpy as np from matplotlib import pyplot as plt import scipy.io.wavfile as wav from numpy.lib import stride_tricks import PIL.Image as Image import os """ short time fourier transform of audio signal """ def stft(sig, frameSize, overlapFac=0.5, window=np.hanning): win = window(frameSize) hopSize = int(frameSize - np.floor(overlapFac * frameSize)) # zeros at beginning (thus center of 1st window should be for sample nr. 0) samples = np.append(np.zeros(np.floor(frameSize/2.0)), sig) # cols for windowing cols = np.ceil( (len(samples) - frameSize) / float(hopSize)) + 1 # zeros at end (thus samples can be fully covered by frames) samples = np.append(samples, np.zeros(frameSize)) frames = stride_tricks.as_strided(samples, shape=(cols, frameSize), strides=(samples.strides[0]*hopSize, samples.strides[0])).copy() frames *= win return np.fft.rfft(frames) """ scale frequency axis logarithmically """ def logscale_spec(spec, sr=44100, factor=20., alpha=1.0, f0=0.9, fmax=1): spec = spec[:, 0:256] timebins, freqbins = np.shape(spec) scale = np.linspace(0, 1, freqbins) #** factor # http://ieeexplore.ieee.org/xpl/login.jsp?tp=&arnumber=650310&url=http%3A%2F%2Fieeexplore.ieee.org%2Fiel4%2F89%2F14168%2F00650310 scale = np.array(map(lambda x: x * alpha if x <= f0 else (fmax-alpha*f0)/(fmax-f0)*(x-f0)+alpha*f0, scale)) scale *= (freqbins-1)/max(scale) newspec = np.complex128(np.zeros([timebins, freqbins])) allfreqs = np.abs(np.fft.fftfreq(freqbins*2, 1./sr)[:freqbins+1]) freqs = [0.0 for i in range(freqbins)] totw = [0.0 for i in range(freqbins)] for i in range(0, freqbins): if (i < 1 or i + 1 >= freqbins): newspec[:, i] += spec[:, i] freqs[i] += allfreqs[i] totw[i] += 1.0 continue else: # scale[15] = 17.2 w_up = scale[i] - np.floor(scale[i]) w_down = 1 - w_up j = int(np.floor(scale[i])) newspec[:, j] += w_down * spec[:, i] freqs[j] += w_down * allfreqs[i] totw[j] += w_down newspec[:, j + 1] += w_up * spec[:, i] freqs[j + 1] += w_up * allfreqs[i] totw[j + 1] += w_up for i in range(len(freqs)): if (totw[i] > 1e-6): freqs[i] /= totw[i] return newspec, freqs """ plot spectrogram""" def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="gray", channel=0, name='tmp.png', alpha=1, offset=0): samplerate, samples = wav.read(audiopath) samples = samples[:, channel] s = stft(samples, binsize) sshow, freq = logscale_spec(s, factor=1, sr=samplerate, alpha=alpha) sshow = sshow[2:, :] ims = 20.*np.log10(np.abs(sshow)/10e-6) # amplitude to decibel timebins, freqbins = np.shape(ims) ims = np.transpose(ims) # ims = ims[0:256, offset:offset+768] # 0-11khz, ~9s interval ims = ims[0:256, :] # 0-11khz, ~10s interval #print "ims.shape", ims.shape image = Image.fromarray(ims) image = image.convert('L') image.save(name) file = open('trainingData.csv', 'r') for iter, line in enumerate(file.readlines()[1:]): # first line of traininData.csv is header (only for trainingData.csv) filepath = line.split(',')[0] filename = filepath[:-4] wavfile = 'tmp.wav' os.system('mpg123 -w ' + wavfile + ' /home/brainstorm/caffe/Data/mnt/3/language/train/mp3/' + filepath) """ for augmentIdx in range(0, 20): alpha = np.random.uniform(0.9, 1.1) offset = np.random.randint(90) plotstft(wavfile, channel=0, name='/home/brainstorm/data/language/train/pngaugm/'+filename+'.'+str(augmentIdx)+'.png', alpha=alpha, offset=offset) """ # we create only one spectrogram for each speach sample # we don't do vocal tract length perturbation (alpha=1.0) # also we don't crop 9s part from the speech plotstft(wavfile, channel=0, name='/home/brainstorm/data/language/train/pngaugm/'+filename+'.png', alpha=1.0) os.remove(wavfile) print "processed %d files" % (iter + 1) ================================================ FILE: ensembling/ensemble.theano.py ================================================ """ Usage: python ensemble.theano.py model1 [another_model]* for GPU mode 1. export PATH=$PATH:/usr/local/cuda-6.5/bin 2. THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32,nvcc.flags='-arch=sm_30' python ensemble.theano.py model1 [another_model]* """ import cPickle as pickle import sys import caffe import numpy as np caffe.set_mode_gpu() def get_score(probs, label): pred = sorted([(x, it) for it, x in enumerate(probs)], reverse=True) if (pred[0][1] == label): return 1000 if (pred[1][1] == label): return 400 if (pred[2][1] == label): return 160 return 0 def get_full_score(preds, labels): topCoderScore = 0.0 for i in range(len(labels)): topCoderScore += get_score(preds[i], labels[i]) return topCoderScore / len(labels) * 3520 ####################### COLLECTING INFO ABOUT LANGS ############################ file = open('../trainingData.csv') data = file.readlines()[1:] langs = set() for line in data: filepath, language = line.split(',') language = language.strip() langs.add(language) langs = sorted(langs) file.close() n_models = len(sys.argv) - 1 X = np.zeros((12320, n_models * 176), dtype=np.float32) for iter in range(n_models): csvpath = 'probs/val/' + sys.argv[iter + 1] csv = open(csvpath, 'r') for row_id, line in enumerate(csv.readlines()): mas = line.split(',') mas = np.array([float(x) for x in mas], dtype=np.float32) X[row_id, 176*iter:176*(iter+1)] = mas csv.close() Y = [] label_file = open('../valEqual.csv') for line in label_file.readlines(): Y.append(int(line.split(',')[1])) label_file.close() print "X.shape =", X.shape print "len(Y) =", len(Y) for iter in range(n_models): print "score of model %d = %f" % (iter+1, get_full_score(X[:, 176*iter:176*(iter+1)], Y)) ######################### TRAINING ENSEMBLING MODEL ############################ import theano import theano.tensor as T import lasagne import lasagne.layers as layers n_train_examples = 10000 X = X.astype(theano.config.floatX) trainX = X[:n_train_examples] trainY = Y[:n_train_examples] valX = X[n_train_examples:] valY = Y[n_train_examples:] input_var = T.matrix('X') target_var = T.ivector('y') from lasagne.nonlinearities import softmax, sigmoid, rectify network = lasagne.layers.InputLayer((None, X.shape[1]), input_var) network = lasagne.layers.DenseLayer(network, 4000, nonlinearity=rectify) network = lasagne.layers.DenseLayer(lasagne.layers.dropout(network, 0.5), 176, nonlinearity=softmax) prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() + 0 * lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2) params = lasagne.layers.get_all_params(network, trainable=True) learning_rate = theano.shared(np.float32(0.2)) updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=learning_rate, momentum=0.9) train_fn = theano.function([input_var, target_var], loss, updates=updates) validation_fn = theano.function([input_var, target_var], loss) for epoch in range(1000): train_loss = train_fn(trainX, trainY) val_loss = validation_fn(valX, valY) print "Epoch %d: train_loss = %f, val_loss = %f, lr = %f" % (epoch + 1, train_loss, val_loss, learning_rate.get_value()) if (epoch > 0 and epoch % 200 == 0): learning_rate.set_value(np.float32(learning_rate.get_value() * 0.7)) test_prediction = lasagne.layers.get_output(network, deterministic=True) predict_fn = theano.function([input_var], test_prediction) all_predictions = predict_fn(valX) score = 0.0 for probs, label in zip(all_predictions, valY): score += get_score(probs, label) print "Final score on ensembling validaion = %f" % score print "Expected score = %f" % (score / len(valY) * 3520) print "\n\n==> creating submission..." X = np.zeros((12320, n_models * 176), dtype=np.float32) for iter in range(n_models): csvpath = 'probs/test/' + sys.argv[iter + 1] csv = open(csvpath, 'r') for row_id, line in enumerate(csv.readlines()): mas = line.split(',') mas = np.array([float(x) for x in mas], dtype=np.float32) X[row_id, 176*iter:176*(iter+1)] = mas csv.close() prediction = predict_fn(X) print "prediction.shape =", prediction.shape ensembled = open('ensembled.csv', 'w') for probs in prediction: out = [str(x) for x in probs] ensembled.write(','.join(out) + '\n') """ ######################### SAVING MODEL TO BE ABLE TO REPRODUCE ################# print "==> Saving model..." with open("model.pickle", 'w') as save_file: pickle.dump(obj = {'params' : layers.get_all_param_values(network)}, file = save_file, protocol = -1) """ ================================================ FILE: ensembling/get_output_layers.py ================================================ """ Usage: python get_output_layers.py test|val """ import sys import caffe import numpy as np caffe.set_mode_gpu() deploy = '../prototxt/deploy.augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt' model = 'augm_dropout0.3_on_augm84K-lr0.01_30K_iter_75000' model_path = '../models/' + model + '.caffemodel' """ ####################### networks with no augmentation ########################## net = caffe.Classifier(deploy, model_path) transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) transformer.set_transpose('data', (2, 0, 1)) net.blobs['data'].reshape(1, 1, 256, 858) folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/png/' cnt = 12320 file = open('../valEqual.csv', 'r') prob_file = open('probs/val/' + model + '.csv', 'w') for iter in range(cnt): name = file.readline().split(',')[0] net.blobs['data'].data[...] = transformer.preprocess('data', caffe.io.load_image(folder + name + '.png', color=False)) probs = net.forward()['loss'][0] probs = [str(x) for x in probs] prob_file.write(','.join(probs) + '\n') if (iter % 100 == 0): print "processed %d images" % (iter + 1) """ ######################### networks with augmentation ########################### assert sys.argv[1] in ('test', 'val') dataset = sys.argv[1] augm_cnt = 20 cnt = 12320 if (dataset == 'val'): folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/' file = open('../valEqual.csv', 'r') else: folder = '../test/pngaugm/' file = open('../testingData.csv', 'r') # sum - mean of augm_cnt versions of speech # log - mean of logs of augm_cnt versions of speech # dense - last dense layer, 1024 outputs prob_file_sum = open('probs/' + dataset + '/' + model + '.sum' + str(augm_cnt) + '.csv', 'w') prob_file_log = open('probs/' + dataset + '/' + model + '.log' + str(augm_cnt) + '.csv', 'w') dense_file = open('probs/' + dataset + '/'+ model + '.dense' + str(augm_cnt) + '.csv', 'w') net = caffe.Classifier(deploy, model_path) transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) transformer.set_transpose('data', (2, 0, 1)) net.blobs['data'].reshape(augm_cnt, 1, 256, 768) for iter in range(cnt): if (dataset == 'val'): name = file.readline().split(',')[0] else: name = file.readline().strip()[:-4] X = np.zeros((augm_cnt, 1, 256, 768), dtype=np.float32) for index in range(augm_cnt): augm_path = folder + name + '.' + str(index) + '.png' X[index] = transformer.preprocess('data', caffe.io.load_image(augm_path, color=False)) net.blobs['data'].data[...] = X out = net.forward()['loss'] probs_sum = out.mean(axis=0) probs_log = np.log(out + 1e-7).mean(axis=0) dense = net.blobs['ip2new'].data probs_sum = [str(x) for x in probs_sum] prob_file_sum.write(','.join(probs_sum) + '\n') probs_log = ["%f" % x for x in probs_log] prob_file_log.write(','.join(probs_log) + '\n') for index in range(augm_cnt): tmp = [str(x) for x in dense[index]] dense_file.write(','.join(tmp) + '\n') if (iter % 10 == 0): print "processed %d images" % (iter + 1) ================================================ FILE: get_score_from_probabilities.py ================================================ """ USAGE: python get_score_from_probabilities.py --prediction= --anwser= prediction file may have less lines """ import sys import numpy as np import argparse parser = argparse.ArgumentParser() parser.add_argument('--prediction', type=str) parser.add_argument('--answer', type=str, default='valDataNew.csv') args = parser.parse_args() print args # info about classes file = open('trainingData.csv') data = file.readlines()[1:] langs = set() for line in data: filepath, language = line.split(',') language = language.strip() langs.add(language) langs = sorted(langs) prediction_file = open(args.prediction, 'r') prediction_lines = prediction_file.readlines() answer_file = open(args.answer, 'r') answer_lines = answer_file.readlines() cnt = len(prediction_lines) top_coder_score = 0.0 correct = 0 wrong_answers = open('wrong_answers.txt', 'w') for iter in range(cnt): st = answer_lines[iter] (name, label) = st.split(',') label = int(label) out = prediction_lines[iter].split(',') out = [float(x) for x in out] pred = [(x, it) for it, x in enumerate(out)] pred = sorted(pred, reverse=True) if (pred[0][1] == label): correct += 1 top_coder_score = top_coder_score + 1000 elif (pred[1][1] == label): #correct += 1 top_coder_score = top_coder_score + 400 elif (pred[2][1] == label): #correct += 1 top_coder_score = top_coder_score + 160 if (pred[0][1] != label): print >> wrong_answers, answer_lines[iter] + prediction_lines[iter] if ((iter + 1) % 100 == 0): print >> sys.stderr, "processed %d / %d images" % (iter + 1, cnt) print >> sys.stderr, "expected score:", top_coder_score / (iter + 1) * 35200 print >> sys.stderr, "Final score: ", top_coder_score, " / ", cnt, "000" print >> sys.stderr, "expected score:", top_coder_score / cnt * 35200 print >> sys.stderr, "Accuracy: ", 100.0 * correct / cnt ================================================ FILE: get_score_from_top3_prediction.py ================================================ """ USAGE: python get_score_fromcsv.py --prediction= --anwser= Prediction file may have less lines Each line of prediction file must contain at least 3 integers: labels of top3 predictions, then it may have some additional information """ import sys import numpy as np import argparse parser = argparse.ArgumentParser() parser.add_argument('--prediction', type=str) parser.add_argument('--answer', type=str, default='valDataNew.csv') args = parser.parse_args() print args # info about classes file = open('trainingData.csv') data = file.readlines()[1:] langs = set() for line in data: filepath, language = line.split(',') language = language.strip() langs.add(language) langs = sorted(langs) prediction_file = open(args.prediction, 'r') prediction_lines = prediction_file.readlines() answer_file = open(args.answer, 'r') answer_lines = answer_file.readlines() cnt = len(prediction_lines) top_coder_score = 0.0 correct = 0 wrong_answers = open('wrong_answers.txt', 'w') for iter in range(cnt): st = answer_lines[iter] (name, label) = st.split(',') label = int(label) pred = prediction_lines[iter].split(',') pred = [int(x) for x in pred] if (pred[0] == label): correct += 1 top_coder_score = top_coder_score + 1000 elif (pred[1] == label): #correct += 1 top_coder_score = top_coder_score + 400 elif (pred[2] == label): #correct += 1 top_coder_score = top_coder_score + 160 if (pred[0] != label): print >> wrong_answers, (answer_lines[iter] + str(pred[3 + pred[0]]) + ',' + str(pred[3 + pred[1]]) + ',' + str(pred[3 + pred[2]]) + ', votes for correct answer: ' + str(pred[3 + label])) if ((iter + 1) % 100 == 0): print >> sys.stderr, "processed %d / %d images" % (iter + 1, cnt) print >> sys.stderr, "expected score:", top_coder_score / (iter + 1) * 35200 print >> sys.stderr, "Final score: ", top_coder_score, " / ", cnt, "000" print >> sys.stderr, "expected score:", top_coder_score / cnt * 35200 print >> sys.stderr, "Accuracy: ", 100.0 * correct / cnt ================================================ FILE: get_sum_of_csvs.py ================================================ """ Usage: python get_sum_csvs.py csv1path csv2path .. """ import sys import numpy as np n_csv = len(sys.argv) - 1 cnt = 12320 csv = [] for index in range(1, len(sys.argv)): csv.append(open(sys.argv[index], 'r')) outfile = open('summed.csv', 'w') for iter in range(12320): out = np.zeros((176,), dtype=np.float32) for index in range(n_csv): cur_out = csv[index].readline().split(',') cur_out = [float(x) for x in cur_out] out += cur_out out = [("%.6f" % x) for x in out] outfile.write(','.join(out) + '\n') ================================================ FILE: majority_vote_ensembling.py ================================================ """ Usage: python majority_vote_ensembling.py csv1path csv2path .. """ import sys import numpy as np n_csv = len(sys.argv) - 1 train_cnt = 12320 csv = [] for index in range(1, len(sys.argv)): csv.append(open(sys.argv[index], 'r')) ensembled = open('top3_prediction_ensembled.csv', 'w') for iter in range(train_cnt): cnt = [0 for i in range(176)] avg_prob = np.array([0.0 for i in range(176)]) for index in range(n_csv): cur_prob = csv[index].readline().split(',') cur_prob = np.array([float(x) for x in cur_prob]) avg_prob += cur_prob prediction = cur_prob.argmax() cnt[prediction] += 1 mas = [(cnt[index], avg_prob[index], index) for index in range(176)] mas = sorted(mas, reverse=True) ensembled.write(str(mas[0][2]) + ',' + str(mas[1][2]) + ',' + str(mas[2][2]) + ',') ensembled.write(','.join([str(x) for x in cnt]) + '\n') ================================================ FILE: make_submission.py ================================================ """ Usage: python make_submission.py csvpath model_name csv - must contain 12320 rows, 176 coloumns: the predictions for test set """ import sys import numpy as np # info about classes file = open('trainingData.csv') data = file.readlines()[1:] langs = set() for line in data: filepath, language = line.split(',') language = language.strip() langs.add(language) langs = sorted(langs) path = sys.argv[1] name = sys.argv[2] read_file = open(path, 'r') f = open('testingData.csv') cnt = 12320 print_file = open('predictions/test_' + name + '.csv', 'w') for iter in range(cnt): st = f.readline() name = st.strip()[:-4] out = read_file.readline().split(',') out = [float(x) for x in out] pred = sorted([(x, it) for it, x in enumerate(out)], reverse=True) for i in range(3): lang_id = pred[i][1] lang = langs[lang_id] print_file.write(name + '.mp3,' + lang + ',' + str(i + 1) + '\n') if (iter % 100 == 0): print >> sys.stderr, "processed %d / %d images" % (iter + 1, cnt) ================================================ FILE: prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024r-1024r_DLR_nolrcoef.prototxt ================================================ name: "LangNet" # DATA LAYERS layer { name: "mnist" type: "Data" top: "data" top: "label" include { phase: TRAIN } transform_param { scale: 0.00390625 } data_param { source: "train/train_augm_db" batch_size: 24 backend: LEVELDB } } layer { name: "mnist" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { scale: 0.00390625 } data_param { source: "train/val_augm_db" batch_size: 24 backend: LEVELDB } } # CONV1-RELU1-POOL1 layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 32 kernel_size: 7 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } # CONV2-RELU2-POOL2_ layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 64 kernel_size: 5 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } # CONV3-RELU3-POOL3 layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 64 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "pool3" type: "Pooling" bottom: "conv3" top: "pool3" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV4-RELU4-POOL4 layer { name: "conv4" type: "Convolution" bottom: "pool3" top: "conv4" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 128 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "pool4" type: "Pooling" bottom: "conv4" top: "pool4" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV5-RELU5-POOL5 layer { name: "conv5" type: "Convolution" bottom: "pool4" top: "conv5" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 128 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" } layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV6-RELU6-POOL6 layer { name: "conv6" type: "Convolution" bottom: "pool5" top: "conv6" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 256 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu6" type: "ReLU" bottom: "conv6" top: "conv6" } layer { name: "pool6" type: "Pooling" bottom: "conv6" top: "pool6" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # IP layers layer { name: "ip1new" type: "InnerProduct" bottom: "pool6" top: "ip1new" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 1024 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "reluOnIp1" type: "ReLU" bottom: "ip1new" top: "ip1new" } layer { name: "ip2new" type: "InnerProduct" bottom: "ip1new" top: "ip2new" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 1024 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "reluOnIp2" type: "ReLU" bottom: "ip2new" top: "ip2new" } layer { name: "ip3new" type: "InnerProduct" bottom: "ip2new" top: "ip3new" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 176 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "accuracy" type: "Accuracy" bottom: "ip3new" bottom: "label" top: "accuracy" include { phase: TEST } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "ip3new" bottom: "label" top: "loss" } ================================================ FILE: prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt ================================================ name: "LangNet" # DATA LAYERS layer { name: "mnist" type: "Data" top: "data" top: "label" include { phase: TRAIN } transform_param { scale: 0.00390625 } data_param { source: "train/train_augm_db" batch_size: 23 backend: LEVELDB } } layer { name: "mnist" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { scale: 0.00390625 } data_param { source: "train/val_augm_db" batch_size: 24 backend: LEVELDB } } # CONV1-RELU1-POOL1 layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 32 kernel_size: 7 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } # CONV2-RELU2-POOL2_ layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 64 kernel_size: 5 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } # CONV3-RELU3-POOL3 layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 64 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "pool3" type: "Pooling" bottom: "conv3" top: "pool3" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV4-RELU4-POOL4 layer { name: "conv4" type: "Convolution" bottom: "pool3" top: "conv4" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 128 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "pool4" type: "Pooling" bottom: "conv4" top: "pool4" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV5-RELU5-POOL5 layer { name: "conv5" type: "Convolution" bottom: "pool4" top: "conv5" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 128 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" } layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV6-RELU6-POOL6 layer { name: "conv6" type: "Convolution" bottom: "pool5" top: "conv6" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 256 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu6" type: "ReLU" bottom: "conv6" top: "conv6" } layer { name: "pool6" type: "Pooling" bottom: "conv6" top: "pool6" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # IP layers layer { name: "ip1new" type: "InnerProduct" bottom: "pool6" top: "ip1new" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 1024 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "reluOnIp1" type: "ReLU" bottom: "ip1new" top: "ip1new" } layer { name: "dropOnIp1" type: "Dropout" dropout_param { dropout_ratio: 0.3 } bottom: "ip1new" top: "ip1new" } layer { name: "ip2new" type: "InnerProduct" bottom: "ip1new" top: "ip2new" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 1024 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "reluOnIp2" type: "ReLU" bottom: "ip2new" top: "ip2new" } layer { name: "dropOnIp2" type: "Dropout" dropout_param { dropout_ratio: 0.3 } bottom: "ip2new" top: "ip2new" } layer { name: "ip3new" type: "InnerProduct" bottom: "ip2new" top: "ip3new" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 176 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "accuracy" type: "Accuracy" bottom: "ip3new" bottom: "label" top: "accuracy" include { phase: TEST } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "ip3new" bottom: "label" top: "loss" } ================================================ FILE: prototxt/deploy.augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt ================================================ name: "LangNet" # DATA LAYERS input: "data" input_dim: 1 input_dim: 1 input_dim: 256 input_dim: 768 # CONV1-RELU1-POOL1 layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 32 kernel_size: 7 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } # CONV2-RELU2-POOL2_ layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 64 kernel_size: 5 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } # CONV3-RELU3-POOL3 layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 64 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "pool3" type: "Pooling" bottom: "conv3" top: "pool3" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV4-RELU4-POOL4 layer { name: "conv4" type: "Convolution" bottom: "pool3" top: "conv4" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 128 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "pool4" type: "Pooling" bottom: "conv4" top: "pool4" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV5-RELU5-POOL5 layer { name: "conv5" type: "Convolution" bottom: "pool4" top: "conv5" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 128 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" } layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV6-RELU6-POOL6 layer { name: "conv6" type: "Convolution" bottom: "pool5" top: "conv6" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 256 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu6" type: "ReLU" bottom: "conv6" top: "conv6" } layer { name: "pool6" type: "Pooling" bottom: "conv6" top: "pool6" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # IP layers layer { name: "ip1new" type: "InnerProduct" bottom: "pool6" top: "ip1new" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 1024 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "reluOnIp1" type: "ReLU" bottom: "ip1new" top: "ip1new" } layer { name: "dropOnIp1" type: "Dropout" dropout_param { dropout_ratio: 0.3 } bottom: "ip1new" top: "ip1new" } layer { name: "ip2new" type: "InnerProduct" bottom: "ip1new" top: "ip2new" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 1024 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "reluOnIp2" type: "ReLU" bottom: "ip2new" top: "ip2new" } layer { name: "dropOnIp2" type: "Dropout" dropout_param { dropout_ratio: 0.3 } bottom: "ip2new" top: "ip2new" } layer { name: "ip3new" type: "InnerProduct" bottom: "ip2new" top: "ip3new" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 176 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "loss" type: "Softmax" bottom: "ip3new" top: "loss" } ================================================ FILE: prototxt/deploy.main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt ================================================ name: "LangNet" # DATA LAYERS input: "data" input_dim: 1 input_dim: 1 input_dim: 256 input_dim: 858 # CONV1-RELU1-POOL1 layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 15 } param { lr_mult: 30 } convolution_param { num_output: 32 kernel_size: 7 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } # CONV2-RELU2-POOL2_ layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 12 } param { lr_mult: 24 } convolution_param { num_output: 64 kernel_size: 5 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } # CONV3-RELU3-POOL3 layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 9 } param { lr_mult: 18 } convolution_param { num_output: 64 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "pool3" type: "Pooling" bottom: "conv3" top: "pool3" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV4-RELU4-POOL4 layer { name: "conv4" type: "Convolution" bottom: "pool3" top: "conv4" param { lr_mult: 4 } param { lr_mult: 8 } convolution_param { num_output: 128 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "pool4" type: "Pooling" bottom: "conv4" top: "pool4" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV5-RELU5-POOL5 layer { name: "conv5" type: "Convolution" bottom: "pool4" top: "conv5" param { lr_mult: 2 } param { lr_mult: 4 } convolution_param { num_output: 128 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" } layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV6-RELU6-POOL6 layer { name: "conv6" type: "Convolution" bottom: "pool5" top: "conv6" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 256 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu6" type: "ReLU" bottom: "conv6" top: "conv6" } layer { name: "pool6" type: "Pooling" bottom: "conv6" top: "pool6" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # IP layers layer { name: "ip1" type: "InnerProduct" bottom: "pool6" top: "ip1" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 1024 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "reluOnIp1" type: "ReLU" bottom: "ip1" top: "ip1" } layer { name: "dropOnIp1" type: "Dropout" dropout_param { dropout_ratio: 0.5 } bottom: "ip1" top: "ip1" } layer { name: "ip2" type: "InnerProduct" bottom: "ip1" top: "ip2" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 1024 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "reluOnIp2" type: "ReLU" bottom: "ip2" top: "ip2" } layer { name: "dropOnIp2" type: "Dropout" dropout_param { dropout_ratio: 0.5 } bottom: "ip2" top: "ip2" } layer { name: "ip3" type: "InnerProduct" bottom: "ip2" top: "ip3" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 176 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "loss" type: "Softmax" bottom: "ip3" top: "loss" } ================================================ FILE: prototxt/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt ================================================ name: "LangNet" # DATA LAYERS layer { name: "mnist" type: "Data" top: "data" top: "label" include { phase: TRAIN } transform_param { scale: 0.00390625 } data_param { source: "train/traindb" batch_size: 32 backend: LEVELDB } } layer { name: "mnist" type: "Data" top: "data" top: "label" include { phase: TEST } transform_param { scale: 0.00390625 } data_param { source: "train/valdb" batch_size: 1 backend: LEVELDB } } # CONV1-RELU1-POOL1 layer { name: "conv1" type: "Convolution" bottom: "data" top: "conv1" param { lr_mult: 15 } param { lr_mult: 30 } convolution_param { num_output: 32 kernel_size: 7 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } # CONV2-RELU2-POOL2_ layer { name: "conv2" type: "Convolution" bottom: "pool1" top: "conv2" param { lr_mult: 12 } param { lr_mult: 24 } convolution_param { num_output: 64 kernel_size: 5 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } # CONV3-RELU3-POOL3 layer { name: "conv3" type: "Convolution" bottom: "pool2" top: "conv3" param { lr_mult: 9 } param { lr_mult: 18 } convolution_param { num_output: 64 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "pool3" type: "Pooling" bottom: "conv3" top: "pool3" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV4-RELU4-POOL4 layer { name: "conv4" type: "Convolution" bottom: "pool3" top: "conv4" param { lr_mult: 4 } param { lr_mult: 8 } convolution_param { num_output: 128 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "pool4" type: "Pooling" bottom: "conv4" top: "pool4" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV5-RELU5-POOL5 layer { name: "conv5" type: "Convolution" bottom: "pool4" top: "conv5" param { lr_mult: 2 } param { lr_mult: 4 } convolution_param { num_output: 128 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" } layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # CONV6-RELU6-POOL6 layer { name: "conv6" type: "Convolution" bottom: "pool5" top: "conv6" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 256 kernel_size: 3 stride: 1 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "relu6" type: "ReLU" bottom: "conv6" top: "conv6" } layer { name: "pool6" type: "Pooling" bottom: "conv6" top: "pool6" pooling_param { pool: MAX kernel_size: 3 stride:2 } } # IP layers layer { name: "ip1" type: "InnerProduct" bottom: "pool6" top: "ip1" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 1024 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "reluOnIp1" type: "ReLU" bottom: "ip1" top: "ip1" } layer { name: "dropOnIp1" type: "Dropout" dropout_param { dropout_ratio: 0.5 } bottom: "ip1" top: "ip1" } layer { name: "ip2" type: "InnerProduct" bottom: "ip1" top: "ip2" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 1024 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "reluOnIp2" type: "ReLU" bottom: "ip2" top: "ip2" } layer { name: "dropOnIp2" type: "Dropout" dropout_param { dropout_ratio: 0.5 } bottom: "ip2" top: "ip2" } layer { name: "ip3" type: "InnerProduct" bottom: "ip2" top: "ip3" param { lr_mult: 1 } param { lr_mult: 2 } inner_product_param { num_output: 176 weight_filler { type: "xavier" } bias_filler { type: "constant" } } } layer { name: "accuracy" type: "Accuracy" bottom: "ip3" bottom: "label" top: "accuracy" include { phase: TEST } } layer { name: "loss" type: "SoftmaxWithLoss" bottom: "ip3" bottom: "label" top: "loss" } ================================================ FILE: prototxt/solver.augm.nolrcoef.prototxt ================================================ net: "prototxt/augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3.prototxt" test_iter: 512 test_interval: 1500 # The base learning rate, momentum and the weight decay of the network. base_lr: 0.01 weight_decay: 0.0000 # The learning rate policy # lr_policy: "fixed" # solver_type: ADADELTA lr_policy: "inv" gamma: 0.0003 power: 0.9 #lr_policy: "step" #gamma: 0.9 #stepsize: 6000 display: 1 max_iter: 800000 snapshot: 3000 snapshot_prefix: "models/augm_dropout0.3_on_augm84K-lr0.01_30K_90K" #log: "logs/augm_dropout0.3_on_augm84K-lr0.01_30K_90K.txt" solver_mode: GPU ================================================ FILE: prototxt/solver.main.adadelta.prototxt ================================================ net: "prototxt/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR.prototxt" test_iter: 100 test_interval: 100 # The base learning rate, momentum and the weight decay of the network. weight_decay: 0.0000 # The learning rate policy base_lr: 0.01 lr_policy: "fixed" solver_type: ADADELTA display: 1 max_iter: 800000 snapshot: 3000 snapshot_prefix: "models/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR_adadelta0.01" #log: "logs/main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR_adadelta0.01.txt" solver_mode: GPU ================================================ FILE: test_augm_network.py ================================================ import sys import caffe import numpy as np caffe.set_mode_gpu() # info about classes file = open('trainingData.csv') data = file.readlines()[1:] langs = set() for line in data: filepath, language = line.split(',') language = language.strip() langs.add(language) langs = sorted(langs) # network parameters: deploy_name = 'augm_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.3-1024rd0.3' network_name = 'augm_dropout0.3_on_augm84K-lr0.01_30K' iterations = '90000' aveSamples = 20 # average over this many samples net = caffe.Classifier(model_file='prototxt/deploy.' + deploy_name + '.prototxt', pretrained_file='models/' + network_name + '_iter_' + iterations + '.caffemodel') net.blobs['data'].reshape(1, 1, 256, 768) predict_set = sys.argv[1] if (predict_set == "test"): folder = 'test/png/' f = open('testingData.csv') cnt = 12320 print_file = open('predictions/test_' + network_name + '_iter_' + iterations + '_' + str(aveSamples) + '.csv', 'w') elif (predict_set == "val"): folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/' f = open('valEqual.csv') cnt = 12320 print_file = open('predictions/validation_' + network_name + '_iter_' + iterations + '_' + str(aveSamples) + '.csv', 'w') else: # train folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/' f = open('trainEqual.csv') cnt = 10000 print_file = open('predictions/train_' + network_name + '_iter_' + iterations + '_' + str(aveSamples) + '.csv', 'w') preds = [] labels = [] topcoder_score = 0.0 processed = 0 for iter in range(cnt): st = f.readline() if (predict_set == "val" or predict_set == "train"): (name, label) = st.split(',') label = int(label) else: name = st.strip()[:-4] processed += 1 out = np.zeros((176, )) for randomIndex in range(aveSamples): image = caffe.io.load_image(folder + name + '.' + str(randomIndex) + '.png', color=False) image = np.transpose(image, (2, 0, 1)) #image = np.concatenate([image, np.zeros((1, 256, 858 - 768), dtype=np.float32)], axis=2) net.blobs['data'].data[...] = image out += net.forward()['loss'][0] pred = sorted([(x, it) for it, x in enumerate(out)], reverse=True) if (predict_set == "val" or predict_set == "train"): if (pred[0][1] == label): topcoder_score = topcoder_score + 1000 elif (pred[1][1] == label): topcoder_score = topcoder_score + 400 elif (pred[2][1] == label): topcoder_score = topcoder_score + 160 for i in range(3): lang_id = pred[i][1] lang = langs[lang_id] print_file.write(name + '.mp3,' + lang + ',' + str(i + 1) + '\n') if (iter % 100 == 0): print >> sys.stderr, network_name + '_iter_' + iterations + '_' + str(aveSamples) print >> sys.stderr, "processed %d / %d images (%d samples/mp3)" % (iter, cnt, aveSamples) print >> sys.stderr, "score: ", topcoder_score print >> sys.stderr, "expected score:", topcoder_score / processed * 35200 print >> sys.stderr, "Final score: ", topcoder_score, " / ", cnt, "000" print >> sys.stderr, "expected score:", topcoder_score / processed * 35200 ================================================ FILE: test_main_network.py ================================================ import sys import caffe import numpy as np caffe.set_mode_gpu() # info about classes file = open('trainingData.csv') data = file.readlines()[1:] langs = set() for line in data: filepath, language = line.split(',') language = language.strip() langs.add(language) langs = sorted(langs) # network parameters: deploy_name = 'main_32r-2-64r-2-64r-2-128r-2-128r-2-256r-2-1024rd0.5-1024rd0.5_DLR' network_name = deploy_name + '_150K-momentum' iterations = '51000' net = caffe.Classifier(model_file='prototxt/deploy.' + deploy_name + '.prototxt', pretrained_file='models/' + network_name + '_iter_' + iterations + '.caffemodel') transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) transformer.set_transpose('data', (2, 0, 1)) net.blobs['data'].reshape(1, 1, 256, 858) predict_set = sys.argv[1] if (predict_set == "test"): folder = 'test/png/' f = open('testingData.csv') cnt = 12320 print_file = open('predictions/test_' + network_name + '_iter_' + iterations + '.csv', 'w') elif (predict_set == "val"): folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/pngaugm/' ## stegh dreci augm f = open('valDataNew.csv') cnt = 16176 print_file = open('predictions/validation_' + network_name + '_iter_' + iterations + '.csv', 'w') else: # train folder = '/home/brainstorm/caffe/Data/mnt/3/language/train/png/' f = open('trainingDataNew.csv') cnt = 10000 print_file = open('predictions/train_' + network_name + '_iter_' + iterations + '.csv', 'w') preds = [] labels = [] topcoder_score = 0 processed = 0 for iter in range(cnt): st = f.readline() if (predict_set == "val" or predict_set == "train"): (name, label) = st.split(',') label = int(label) else: name = st.strip()[:-4] processed += 1 net.blobs['data'].data[...] = transformer.preprocess('data', caffe.io.load_image(folder + name + '.png', color=False)) out = net.forward()['loss'][0] pred = sorted([(x, it) for it, x in enumerate(out)], reverse=True) if (predict_set == "val" or predict_set == "train"): if (pred[0][1] == label): topcoder_score = topcoder_score + 1000 elif (pred[1][1] == label): topcoder_score = topcoder_score + 400 elif (pred[2][1] == label): topcoder_score = topcoder_score + 160 for i in range(3): lang_id = pred[i][1] lang = langs[lang_id] print_file.write(name + '.mp3,' + lang + ',' + str(i + 1) + '\n') if (iter % 100 == 0): print >> sys.stderr, "processed %d / %d images" % (iter, cnt) print >> sys.stderr, "score: ", topcoder_score print >> sys.stderr, "expected score:", topcoder_score / processed * 35200 print >> sys.stderr, "Final score: ", topcoder_score, " / ", cnt, "000" print >> sys.stderr, "expected score:", topcoder_score / processed * 35200 ================================================ FILE: theano/README.md ================================================ # Spoken language identification `networks` folder contains multiple CNN and/or RNN models implemented in Theano/Lasagne. Read more in the corresponding [blog post](http://yerevann.github.io/2016/06/26/combining-cnn-and-rnn-for-spoken-language-identification/). ================================================ FILE: theano/main.py ================================================ import sys import numpy as np import sklearn.metrics as metrics import argparse import time import json import importlib print "==> parsing input arguments" parser = argparse.ArgumentParser() # TODO: add argument to choose training set parser.add_argument('--network', type=str, default="network_batch", help='embeding size (50, 100, 200, 300 only)') parser.add_argument('--epochs', type=int, default=500, help='number of epochs to train') parser.add_argument('--load_state', type=str, default="", help='state file path') parser.add_argument('--mode', type=str, default="train", help='mode: train/test/test_on_train') parser.add_argument('--batch_size', type=int, default=32, help='no commment') parser.add_argument('--l2', type=float, default=0, help='L2 regularization') parser.add_argument('--log_every', type=int, default=100, help='print information every x iteration') parser.add_argument('--save_every', type=int, default=50000, help='save state every x iteration') parser.add_argument('--prefix', type=str, default="", help='optional prefix of network name') parser.add_argument('--dropout', type=float, default=0.0, help='dropout rate (between 0 and 1)') parser.add_argument('--no-batch_norm', dest="batch_norm", action='store_false', help='batch normalization') parser.add_argument('--rnn_num_units', type=int, default=500, help='number of hidden units if the network is RNN') parser.add_argument('--equal_split', type=bool, default=False, help='use trainEqual.csv and valEqual.csv') parser.add_argument('--forward_cnt', type=int, default=1, help='if forward pass is nondeterministic, then how many forward passes are averaged') parser.set_defaults(batch_norm=True) args = parser.parse_args() print args if (args.equal_split): train_listfile = open("/mnt/hdd615/Hrayr/Spoken-language-identification/trainEqual.csv", "r") test_listfile = open("/mnt/hdd615/Hrayr/Spoken-language-identification/valEqual.csv", "r") else: train_listfile = open("/mnt/hdd615/Hrayr/Spoken-language-identification/trainingDataNew.csv", "r") test_listfile = open("/mnt/hdd615/Hrayr/Spoken-language-identification/valDataNew.csv", "r") train_list_raw = train_listfile.readlines() test_list_raw = test_listfile.readlines() print "==> %d training examples" % len(train_list_raw) print "==> %d validation examples" % len(test_list_raw) train_listfile.close() test_listfile.close() args_dict = dict(args._get_kwargs()) args_dict['train_list_raw'] = train_list_raw args_dict['test_list_raw'] = test_list_raw args_dict['png_folder'] = "/mnt/hdd615/Hrayr/Spoken-language-identification/train/png/" print "==> using network %s" % args.network network_module = importlib.import_module("networks." + args.network) network = network_module.Network(**args_dict) network_name = args.prefix + '%s.bs%d%s%s' % ( network.say_name(), args.batch_size, ".bn" if args.batch_norm else "", (".d" + str(args.dropout)) if args.dropout>0 else "") print "==> network_name:", network_name start_epoch = 0 if args.load_state != "": start_epoch = network.load_state(args.load_state) + 1 def do_epoch(mode, epoch): # mode is 'train' or 'test' or 'predict' y_true = [] y_pred = [] avg_loss = 0.0 prev_time = time.time() batches_per_epoch = network.get_batches_per_epoch(mode) all_prediction = [] for i in range(0, batches_per_epoch): step_data = network.step(i, mode) prediction = step_data["prediction"] answers = step_data["answers"] current_loss = step_data["current_loss"] log = step_data["log"] avg_loss += current_loss if (mode == "predict" or mode == "predict_on_train"): all_prediction.append(prediction) for pass_id in range(args.forward_cnt-1): step_data = network.step(i, mode) prediction += step_data["prediction"] current_loss += step_data["current_loss"] prediction /= args.forward_cnt current_loss /= args.forward_cnt for x in answers: y_true.append(x) for x in prediction.argmax(axis=1): y_pred.append(x) if ((i + 1) % args.log_every == 0): cur_time = time.time() print (" %sing: %d.%d / %d \t loss: %3f \t avg_loss: %.5f \t %s \t time: %.2fs" % (mode, epoch, (i + 1) * args.batch_size, batches_per_epoch * args.batch_size, current_loss, avg_loss / (i + 1), log, cur_time - prev_time)) prev_time = cur_time #print "confusion matrix:" #print metrics.confusion_matrix(y_true, y_pred) accuracy = sum([1 if t == p else 0 for t, p in zip(y_true, y_pred)]) print "accuracy: %.2f percent" % (accuracy * 100.0 / batches_per_epoch / args.batch_size) if (mode == "predict"): all_prediction = np.vstack(all_prediction) pred_filename = "predictions/" + ("equal_split." if args.equal_split else "") + \ args.load_state[args.load_state.rfind('/')+1:] + ".csv" with open(pred_filename, 'w') as pred_csv: for x in all_prediction: print >> pred_csv, ",".join([("%.6f" % prob) for prob in x]) return avg_loss / batches_per_epoch if args.mode == 'train': print "==> training" for epoch in range(start_epoch, args.epochs): do_epoch('train', epoch) test_loss = do_epoch('test', epoch) state_name = 'states/%s.epoch%d.test%.5f.state' % (network_name, epoch, test_loss) print "==> saving ... %s" % state_name network.save_params(state_name, epoch) elif args.mode == 'test': do_epoch('predict', 0) elif args.mode == 'test_on_train': do_epoch('predict_on_train', 0) else: raise Exception("unknown mode") ================================================ FILE: theano/networks/__init__.py ================================================ ================================================ FILE: theano/networks/base_network.py ================================================ import cPickle as pickle class BaseNetwork: def say_name(self): return "unknown" def save_params(self, file_name, epoch, **kwargs): with open(file_name, 'w') as save_file: pickle.dump( obj = { 'params' : [x.get_value() for x in self.params], 'epoch' : epoch, }, file = save_file, protocol = -1 ) def load_state(self, file_name): print "==> loading state %s" % file_name epoch = 0 with open(file_name, 'r') as load_file: dict = pickle.load(load_file) loaded_params = dict['params'] for (x, y) in zip(self.params, loaded_params): x.set_value(y) epoch = dict['epoch'] return epoch def get_batches_per_epoch(self, mode): if (mode == 'train' or mode == 'predict_on_train'): return len(self.train_list_raw) / self.batch_size elif (mode == 'test' or mode == 'predict'): return len(self.test_list_raw) / self.batch_size else: raise Exception("unknown mode") def step(self, batch_index, mode): if (mode == "train"): data, answers = self.read_batch(self.train_list_raw, batch_index) theano_fn = self.train_fn elif (mode == "test" or mode == "predict"): data, answers = self.read_batch(self.test_list_raw, batch_index) theano_fn = self.test_fn elif (mode == "predict_on_train"): data, answers = self.read_batch(self.train_list_raw, batch_index) theano_fn = self.test_fn else: raise Exception("unrecognized mode") ret = theano_fn(data, answers) return {"prediction": ret[0], "answers": answers, "current_loss": ret[1], "log": "", } ================================================ FILE: theano/networks/rnn.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, l2, mode, rnn_num_units, **kwargs): print "==> not used params in DMN class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.l2 = l2 self.mode = mode self.num_units = rnn_num_units self.input_var = T.tensor3('input_var') self.answer_var = T.ivector('answer_var') print "==> building network" example = np.random.uniform(size=(self.batch_size, 858, 256), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### # InputLayer network = layers.InputLayer(shape=(None, 858, 256), input_var=self.input_var) print layers.get_output(network).eval({self.input_var:example}).shape # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True) print layers.get_output(network).eval({self.input_var:example}).shape # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params = layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean() if (self.l2 > 0): self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, lasagne.regularization.l2) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 #updates = lasagne.updates.adadelta(self.loss, self.params) updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.0005) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "rnn.GRU.num_units%d" % self.num_units def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 858, 256), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) data[i - start_index, :, :] = np.transpose(np.array(im).astype(np.float32) / 256.0) answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/networks/rnn_2layers.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, l2, mode, rnn_num_units, batch_norm, **kwargs): print "==> not used params in DMN class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.l2 = l2 self.mode = mode self.num_units = rnn_num_units self.batch_norm = batch_norm self.input_var = T.tensor3('input_var') self.answer_var = T.ivector('answer_var') # scale inputs to be in [-1, 1] input_var_norm = 2 * self.input_var - 1 print "==> building network" example = np.random.uniform(size=(self.batch_size, 858, 256), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### # InputLayer network = layers.InputLayer(shape=(None, 858, 256), input_var=input_var_norm) print layers.get_output(network).eval({self.input_var:example}).shape # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units) print layers.get_output(network).eval({self.input_var:example}).shape # BatchNormalization Layer if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) print layers.get_output(network).eval({self.input_var:example}).shape # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True) print layers.get_output(network).eval({self.input_var:example}).shape # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params = layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean() if (self.l2 > 0): self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, lasagne.regularization.l2) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 #updates = lasagne.updates.adadelta(self.loss, self.params) updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "rnn_2layers.GRU.num_units%d" % self.num_units def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 858, 256), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) data[i - start_index, :, :] = np.transpose(np.array(im).astype(np.float32) / 256.0) answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/networks/rnn_2layers_5khz.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, l2, mode, rnn_num_units, batch_norm, **kwargs): print "==> not used params in network class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.l2 = l2 self.mode = mode self.num_units = rnn_num_units self.batch_norm = batch_norm self.input_var = T.tensor3('input_var') self.answer_var = T.ivector('answer_var') # scale inputs to be in [-1, 1] input_var_norm = 2 * self.input_var - 1 print "==> building network" example = np.random.uniform(size=(self.batch_size, 858, 128), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### # InputLayer network = layers.InputLayer(shape=(None, 858, 128), input_var=input_var_norm) print layers.get_output(network).eval({self.input_var:example}).shape # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units) print layers.get_output(network).eval({self.input_var:example}).shape # BatchNormalization Layer if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) print layers.get_output(network).eval({self.input_var:example}).shape # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True) print layers.get_output(network).eval({self.input_var:example}).shape # BatchNormalization Layer # There are some states, where this layer was disabled if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) print layers.get_output(network).eval({self.input_var:example}).shape # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params = layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean() if (self.l2 > 0): self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, lasagne.regularization.l2) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "rnn_2layers_5khz.GRU.num_units%d" % self.num_units def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 858, 128), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) data[i - start_index, :, :] = np.transpose(np.array(im).astype(np.float32) / 256.0)[:, :128] answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/networks/tc_net.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, **kwargs): print "==> not used params in DMN class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.dropout = dropout self.l2 = l2 self.mode = mode self.batch_norm = batch_norm self.input_var = T.tensor4('input_var') self.answer_var = T.ivector('answer_var') print "==> building network" example = np.random.uniform(size=(self.batch_size, 1, 256, 858), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### network = layers.InputLayer(shape=(None, 1, 256, 858), input_var=self.input_var) print layers.get_output(network).eval({self.input_var:example}).shape # CONV-RELU-POOL 1 network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 2 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 3 network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 4 network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 5 network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 6 network = layers.Conv2DLayer(incoming=network, num_filters=256, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(3, 2), ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # DENSE 1 network = layers.DenseLayer(incoming=network, num_units=1024, nonlinearity=rectify) if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) if (self.dropout > 0): network = layers.dropout(network, self.dropout) print layers.get_output(network).eval({self.input_var:example}).shape """ # DENSE 2 network = layers.DenseLayer(incoming=network, num_units=1024, nonlinearity=rectify) if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) if (self.dropout > 0): network = layers.dropout(network, self.dropout) print layers.get_output(network).eval({self.input_var:example}).shape """ # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params = layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean() if (self.l2 > 0): self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, lasagne.regularization.l2) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 #updates = lasagne.updates.adadelta(self.loss, self.params) updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "tc_net" def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 1, 256, 858), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) data[i - start_index, 0, :, :] = np.array(im).astype(np.float32) / 256.0 answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/networks/tc_net_deeprnn_shared_pad.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs): print "==> not used params in DMN class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.dropout = dropout self.l2 = l2 self.mode = mode self.batch_norm = batch_norm self.num_units = rnn_num_units self.input_var = T.tensor4('input_var') self.answer_var = T.ivector('answer_var') print "==> building network" example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var) print layers.get_output(network).eval({self.input_var:example}).shape # CONV-RELU-POOL 1 network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 2 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 3 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 4 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) self.params = layers.get_all_params(network, trainable=True) output = layers.get_output(network) num_channels = 32 filter_W = 54 filter_H = 8 # NOTE: these constants are shapes of last pool layer, it can be symbolic # explicit values are better for optimizations channels = [] for channel_index in range(num_channels): channels.append(output[:, channel_index, :, :].transpose((0, 2, 1))) rnn_network_outputs = [] W_in_to_updategate = None W_hid_to_updategate = None b_updategate = None W_in_to_resetgate = None W_hid_to_resetgate = None b_resetgate = None W_in_to_hidden_update = None W_hid_to_hidden_update = None b_hidden_update = None W_in_to_updategate1 = None W_hid_to_updategate1 = None b_updategate1 = None W_in_to_resetgate1 = None W_hid_to_resetgate1 = None b_resetgate1 = None W_in_to_hidden_update1 = None W_hid_to_hidden_update1 = None b_hidden_update1 = None for channel_index in range(num_channels): rnn_input_var = channels[channel_index] # InputLayer network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var) if (channel_index == 0): # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=False) W_in_to_updategate = network.W_in_to_updategate W_hid_to_updategate = network.W_hid_to_updategate b_updategate = network.b_updategate W_in_to_resetgate = network.W_in_to_resetgate W_hid_to_resetgate = network.W_hid_to_resetgate b_resetgate = network.b_resetgate W_in_to_hidden_update = network.W_in_to_hidden_update W_hid_to_hidden_update = network.W_hid_to_hidden_update b_hidden_update = network.b_hidden_update # BatchNormalization Layer if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True) W_in_to_updategate1 = network.W_in_to_updategate W_hid_to_updategate1 = network.W_hid_to_updategate b_updategate1 = network.b_updategate W_in_to_resetgate1 = network.W_in_to_resetgate W_hid_to_resetgate1 = network.W_hid_to_resetgate b_resetgate1 = network.b_resetgate W_in_to_hidden_update1 = network.W_in_to_hidden_update W_hid_to_hidden_update1 = network.W_hid_to_hidden_update b_hidden_update1 = network.b_hidden_update # add params self.params += layers.get_all_params(network, trainable=True) else: # GRULayer, but shared network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=False, resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate), updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate), hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update)) # BatchNormalization Layer if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # GRULayer, but shared network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True, resetgate=layers.Gate(W_in=W_in_to_resetgate1, W_hid=W_hid_to_resetgate1, b=b_resetgate1), updategate=layers.Gate(W_in=W_in_to_updategate1, W_hid=W_hid_to_updategate1, b=b_updategate1), hidden_update=layers.Gate(W_in=W_in_to_hidden_update1, W_hid=W_hid_to_hidden_update1, b=b_hidden_update1)) rnn_network_outputs.append(layers.get_output(network)) all_output_var = T.concatenate(rnn_network_outputs, axis=1) print all_output_var.eval({self.input_var:example}).shape # InputLayer network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var) # Dropout Layer if (self.dropout > 0): network = layers.dropout(network, self.dropout) # BatchNormalization Layer if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params += layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) #print "==> param shapes", [x.eval().shape for x in self.params] self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean() if (self.l2 > 0): self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, lasagne.regularization.l2) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 #updates = lasagne.updates.adadelta(self.loss, self.params) updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "tc_net_deeprnn.4conv.pad.GRU.shared.num_units%d.5khz" % self.num_units def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0 answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/networks/tc_net_mod.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, **kwargs): print "==> not used params in DMN class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.dropout = dropout self.l2 = l2 self.mode = mode self.batch_norm = batch_norm self.input_var = T.tensor4('input_var') self.answer_var = T.ivector('answer_var') print "==> building network" example = np.random.uniform(size=(self.batch_size, 1, 256, 858), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### network = layers.InputLayer(shape=(None, 1, 256, 858), input_var=self.input_var) print layers.get_output(network).eval({self.input_var:example}).shape # NOTE: replace pad=2 with ignore_border=False # CONV-RELU-POOL 1 network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 2 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 3 network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 4 network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 5 network = layers.Conv2DLayer(incoming=network, num_filters=128, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 6 network = layers.Conv2DLayer(incoming=network, num_filters=256, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(3, 2), pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # DENSE 1 network = layers.DenseLayer(incoming=network, num_units=1024, nonlinearity=rectify) if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) if (self.dropout > 0): network = layers.dropout(network, self.dropout) print layers.get_output(network).eval({self.input_var:example}).shape # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params = layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) print "==> param shapes", [x.eval().shape for x in self.params] self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean() if (self.l2 > 0): self.loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, lasagne.regularization.l2) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 #updates = lasagne.updates.adadelta(self.loss, self.params) updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "tc_net_mod" def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 1, 256, 858), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) data[i - start_index, 0, :, :] = np.array(im).astype(np.float32) / 256.0 answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/networks/tc_net_mod_5khz_small.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, **kwargs): print "==> not used params in DMN class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.dropout = dropout self.l2 = l2 self.mode = mode self.batch_norm = batch_norm self.input_var = T.tensor4('input_var') self.answer_var = T.ivector('answer_var') print "==> building network" example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var) print layers.get_output(network).eval({self.input_var:example}).shape # CONV-RELU-POOL 1 network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 2 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 3 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 4 network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 5 network = layers.Conv2DLayer(incoming=network, num_filters=64, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # DENSE 1 network = layers.DenseLayer(incoming=network, num_units=256, nonlinearity=rectify) if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) if (self.dropout > 0): network = layers.dropout(network, self.dropout) print layers.get_output(network).eval({self.input_var:example}).shape # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params = layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) self.test_prediction = layers.get_output(network, deterministic=True) print "==> param shapes", [x.eval().shape for x in self.params] def get_loss(prediction): loss_ce = lasagne.objectives.categorical_crossentropy(prediction, self.answer_var).mean() if (self.l2 > 0): loss_l2 = self.l2 * lasagne.regularization.regularize_network_params(network, lasagne.regularization.l2) else: loss_l2 = 0 return loss_ce + loss_l2 self.loss = get_loss(self.prediction) self.test_loss = get_loss(self.test_prediction) #updates = lasagne.updates.adadelta(self.loss, self.params) updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" # deterministic version #self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], # outputs=[self.test_prediction, self.test_loss]) # non deterministic version, as train_fn self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "tc_net_mod_5khz_small" def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0 answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/networks/tc_net_rnn.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs): print "==> not used params in DMN class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.dropout = dropout self.l2 = l2 self.mode = mode self.batch_norm = batch_norm self.num_units = rnn_num_units self.input_var = T.tensor4('input_var') self.answer_var = T.ivector('answer_var') print "==> building network" example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var) print layers.get_output(network).eval({self.input_var:example}).shape # CONV-RELU-POOL 1 network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 2 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 3 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) self.params = layers.get_all_params(network, trainable=True) output = layers.get_output(network) num_channels = 32 filter_W = 104 filter_H = 13 # NOTE: these constants are shapes of last pool layer, it can be symbolic # explicit values are better for optimizations channels = [] for channel_index in range(num_channels): channels.append(output[:, channel_index, :, :].transpose((0, 2, 1))) rnn_network_outputs = [] for channel_index in range(num_channels): rnn_input_var = channels[channel_index] # InputLayer network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var) # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True) # BatchNormalization Layer if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # add params self.params += layers.get_all_params(network, trainable=True) rnn_network_outputs.append(layers.get_output(network)) all_output_var = T.concatenate(rnn_network_outputs, axis=1) print all_output_var.eval({self.input_var:example}).shape # InputLayer network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var) # DENSE 1 network = layers.DenseLayer(incoming=network, num_units=512, nonlinearity=rectify) if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) if (self.dropout > 0): network = layers.dropout(network, self.dropout) print layers.get_output(network).eval({self.input_var:example}).shape # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params += layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) #print "==> param shapes", [x.eval().shape for x in self.params] self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean() if (self.l2 > 0): self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, lasagne.regularization.l2) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 #updates = lasagne.updates.adadelta(self.loss, self.params) updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "tc_net_rnn.GRU.3conv.num_units%d.5khz" % self.num_units def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0 answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/networks/tc_net_rnn_nodense.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs): print "==> not used params in DMN class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.dropout = dropout self.l2 = l2 self.mode = mode self.batch_norm = batch_norm self.num_units = rnn_num_units self.input_var = T.tensor4('input_var') self.answer_var = T.ivector('answer_var') print "==> building network" example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var) print layers.get_output(network).eval({self.input_var:example}).shape # CONV-RELU-POOL 1 network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 2 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 3 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) self.params = layers.get_all_params(network, trainable=True) output = layers.get_output(network) num_channels = 32 filter_W = 104 filter_H = 13 # NOTE: these constants are shapes of last pool layer, it can be symbolic # explicit values are better for optimizations channels = [] for channel_index in range(num_channels): channels.append(output[:, channel_index, :, :].transpose((0, 2, 1))) rnn_network_outputs = [] for channel_index in range(num_channels): rnn_input_var = channels[channel_index] # InputLayer network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var) # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True) # BatchNormalization Layer if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # add params self.params += layers.get_all_params(network, trainable=True) rnn_network_outputs.append(layers.get_output(network)) all_output_var = T.concatenate(rnn_network_outputs, axis=1) print all_output_var.eval({self.input_var:example}).shape # InputLayer network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var) """ # DENSE 1 network = layers.DenseLayer(incoming=network, num_units=512, nonlinearity=rectify) if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) if (self.dropout > 0): network = layers.dropout(network, self.dropout) print layers.get_output(network).eval({self.input_var:example}).shape """ # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params += layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) #print "==> param shapes", [x.eval().shape for x in self.params] self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean() if (self.l2 > 0): self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, lasagne.regularization.l2) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 #updates = lasagne.updates.adadelta(self.loss, self.params) updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "tc_net_rnn.3conv.GRU.num_units%d.nodense.5khz" % self.num_units def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0 answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/networks/tc_net_rnn_onernn.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs): print "==> not used params in DMN class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.dropout = dropout self.l2 = l2 self.mode = mode self.batch_norm = batch_norm self.num_units = rnn_num_units self.input_var = T.tensor4('input_var') self.answer_var = T.ivector('answer_var') print "==> building network" example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var) print layers.get_output(network).eval({self.input_var:example}).shape # CONV-RELU-POOL 1 network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 2 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 3 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 4 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) self.params = layers.get_all_params(network, trainable=True) output = layers.get_output(network) output = output.transpose((0, 3, 1, 2)) output = output.flatten(ndim=3) # NOTE: these constants are shapes of last pool layer, it can be symbolic # explicit values are better for optimizations num_channels = 32 filter_W = 54 filter_H = 8 # InputLayer network = layers.InputLayer(shape=(None, filter_W, num_channels * filter_H), input_var=output) print layers.get_output(network).eval({self.input_var:example}).shape # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) if (self.dropout > 0): network = layers.dropout(network, self.dropout) # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params += layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) #print "==> param shapes", [x.eval().shape for x in self.params] self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean() if (self.l2 > 0): self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, lasagne.regularization.l2) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 #updates = lasagne.updates.adadelta(self.loss, self.params) #updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) # good one updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.0003) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "tc_net_rnn.4conv.pad.GRU.onernn.num_units%d.5khz" % self.num_units def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0 answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/networks/tc_net_rnn_onernn_notimepool.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs): print "==> not used params in DMN class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.dropout = dropout self.l2 = l2 self.mode = mode self.batch_norm = batch_norm self.num_units = rnn_num_units self.input_var = T.tensor4('input_var') self.answer_var = T.ivector('answer_var') print "==> building network" example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var) print layers.get_output(network).eval({self.input_var:example}).shape # CONV-RELU-POOL 1 network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 2 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 3 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 4 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=(2,1), pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) self.params = layers.get_all_params(network, trainable=True) output = layers.get_output(network) output = output.transpose((0, 3, 1, 2)) output = output.flatten(ndim=3) # NOTE: these constants are shapes of last pool layer, it can be symbolic # explicit values are better for optimizations num_channels = 32 filter_W = 852 filter_H = 8 # InputLayer network = layers.InputLayer(shape=(None, filter_W, num_channels * filter_H), input_var=output) print layers.get_output(network).eval({self.input_var:example}).shape # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) if (self.dropout > 0): network = layers.dropout(network, self.dropout) # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params += layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) #print "==> param shapes", [x.eval().shape for x in self.params] self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean() if (self.l2 > 0): self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, lasagne.regularization.l2) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 #updates = lasagne.updates.adadelta(self.loss, self.params) #updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) # good one updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.001) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "tc_net_rnn.4conv.pad.GRU.onernn.notimepool.num_units%d.5khz" % self.num_units def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0 answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/networks/tc_net_rnn_shared.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs): print "==> not used params in DMN class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.dropout = dropout self.l2 = l2 self.mode = mode self.batch_norm = batch_norm self.num_units = rnn_num_units self.input_var = T.tensor4('input_var') self.answer_var = T.ivector('answer_var') print "==> building network" example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var) print layers.get_output(network).eval({self.input_var:example}).shape # CONV-RELU-POOL 1 network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 2 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 3 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, ignore_border=False) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) self.params = layers.get_all_params(network, trainable=True) output = layers.get_output(network) num_channels = 32 filter_W = 104 filter_H = 13 # NOTE: these constants are shapes of last pool layer, it can be symbolic # explicit values are better for optimizations channels = [] for channel_index in range(num_channels): channels.append(output[:, channel_index, :, :].transpose((0, 2, 1))) rnn_network_outputs = [] W_in_to_updategate = None W_hid_to_updategate = None b_updategate = None W_in_to_resetgate = None W_hid_to_resetgate = None b_resetgate = None W_in_to_hidden_update = None W_hid_to_hidden_update = None b_hidden_update = None for channel_index in range(num_channels): rnn_input_var = channels[channel_index] # InputLayer network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var) if (channel_index == 0): # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True) W_in_to_updategate = network.W_in_to_updategate W_hid_to_updategate = network.W_hid_to_updategate b_updategate = network.b_updategate W_in_to_resetgate = network.W_in_to_resetgate W_hid_to_resetgate = network.W_hid_to_resetgate b_resetgate = network.b_resetgate W_in_to_hidden_update = network.W_in_to_hidden_update W_hid_to_hidden_update = network.W_hid_to_hidden_update b_hidden_update = network.b_hidden_update # add params self.params += layers.get_all_params(network, trainable=True) else: # GRULayer, but shared network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True, resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate), updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate), hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update)) rnn_network_outputs.append(layers.get_output(network)) all_output_var = T.concatenate(rnn_network_outputs, axis=1) print all_output_var.eval({self.input_var:example}).shape # InputLayer network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var) # BatchNormalization Layer if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params += layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) #print "==> param shapes", [x.eval().shape for x in self.params] self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean() if (self.l2 > 0): self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, lasagne.regularization.l2) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 #updates = lasagne.updates.adadelta(self.loss, self.params) updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "tc_net_rnn.3conv.GRU.shared.num_units%d.5khz" % self.num_units def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0 answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/networks/tc_net_rnn_shared_pad.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs): print "==> not used params in DMN class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.dropout = dropout self.l2 = l2 self.mode = mode self.batch_norm = batch_norm self.num_units = rnn_num_units self.input_var = T.tensor4('input_var') self.answer_var = T.ivector('answer_var') print "==> building network" example = np.random.uniform(size=(self.batch_size, 1, 128, 858), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### network = layers.InputLayer(shape=(None, 1, 128, 858), input_var=self.input_var) print layers.get_output(network).eval({self.input_var:example}).shape # CONV-RELU-POOL 1 network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 2 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 3 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 4 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) self.params = layers.get_all_params(network, trainable=True) output = layers.get_output(network) num_channels = 32 filter_W = 54 filter_H = 8 # NOTE: these constants are shapes of last pool layer, it can be symbolic # explicit values are better for optimizations channels = [] for channel_index in range(num_channels): channels.append(output[:, channel_index, :, :].transpose((0, 2, 1))) rnn_network_outputs = [] W_in_to_updategate = None W_hid_to_updategate = None b_updategate = None W_in_to_resetgate = None W_hid_to_resetgate = None b_resetgate = None W_in_to_hidden_update = None W_hid_to_hidden_update = None b_hidden_update = None for channel_index in range(num_channels): rnn_input_var = channels[channel_index] # InputLayer network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var) if (channel_index == 0): # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True) W_in_to_updategate = network.W_in_to_updategate W_hid_to_updategate = network.W_hid_to_updategate b_updategate = network.b_updategate W_in_to_resetgate = network.W_in_to_resetgate W_hid_to_resetgate = network.W_hid_to_resetgate b_resetgate = network.b_resetgate W_in_to_hidden_update = network.W_in_to_hidden_update W_hid_to_hidden_update = network.W_hid_to_hidden_update b_hidden_update = network.b_hidden_update # add params self.params += layers.get_all_params(network, trainable=True) else: # GRULayer, but shared network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True, resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate), updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate), hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update)) rnn_network_outputs.append(layers.get_output(network)) all_output_var = T.concatenate(rnn_network_outputs, axis=1) print all_output_var.eval({self.input_var:example}).shape # InputLayer network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var) # Dropout Layer if (self.dropout > 0): network = layers.dropout(network, self.dropout) # BatchNormalization Layer if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params += layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) #print "==> param shapes", [x.eval().shape for x in self.params] self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean() if (self.l2 > 0): self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, lasagne.regularization.l2) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 #updates = lasagne.updates.adadelta(self.loss, self.params) updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "tc_net_rnn.4conv.pad.GRU.shared.num_units%d.5khz" % self.num_units def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 1, 128, 858), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, :] / 256.0 answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/networks/tc_net_rnn_shared_pad_augm.py ================================================ import random import numpy as np import theano import theano.tensor as T import lasagne from lasagne import layers from lasagne.nonlinearities import rectify, softmax, sigmoid, tanh import PIL.Image as Image from base_network import BaseNetwork floatX = theano.config.floatX class Network(BaseNetwork): def __init__(self, train_list_raw, test_list_raw, png_folder, batch_size, dropout, l2, mode, batch_norm, rnn_num_units, **kwargs): print "==> not used params in DMN class:", kwargs.keys() self.train_list_raw = train_list_raw self.test_list_raw = test_list_raw self.png_folder = png_folder self.batch_size = batch_size self.dropout = dropout self.l2 = l2 self.mode = mode self.batch_norm = batch_norm self.num_units = rnn_num_units self.input_var = T.tensor4('input_var') self.answer_var = T.ivector('answer_var') print "==> building network" example = np.random.uniform(size=(self.batch_size, 1, 128, 768), low=0.0, high=1.0).astype(np.float32) ######### answer = np.random.randint(low=0, high=176, size=(self.batch_size,)) ######### network = layers.InputLayer(shape=(None, 1, 128, 768), input_var=self.input_var) print layers.get_output(network).eval({self.input_var:example}).shape # CONV-RELU-POOL 1 network = layers.Conv2DLayer(incoming=network, num_filters=16, filter_size=(7, 7), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 2 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(5, 5), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 3 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # CONV-RELU-POOL 4 network = layers.Conv2DLayer(incoming=network, num_filters=32, filter_size=(3, 3), stride=1, nonlinearity=rectify) print layers.get_output(network).eval({self.input_var:example}).shape network = layers.MaxPool2DLayer(incoming=network, pool_size=(3, 3), stride=2, pad=2) print layers.get_output(network).eval({self.input_var:example}).shape if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) self.params = layers.get_all_params(network, trainable=True) output = layers.get_output(network) num_channels = 32 filter_W = 48 filter_H = 8 # NOTE: these constants are shapes of last pool layer, it can be symbolic # explicit values are better for optimizations channels = [] for channel_index in range(num_channels): channels.append(output[:, channel_index, :, :].transpose((0, 2, 1))) rnn_network_outputs = [] W_in_to_updategate = None W_hid_to_updategate = None b_updategate = None W_in_to_resetgate = None W_hid_to_resetgate = None b_resetgate = None W_in_to_hidden_update = None W_hid_to_hidden_update = None b_hidden_update = None for channel_index in range(num_channels): rnn_input_var = channels[channel_index] # InputLayer network = layers.InputLayer(shape=(None, filter_W, filter_H), input_var=rnn_input_var) if (channel_index == 0): # GRULayer network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True) W_in_to_updategate = network.W_in_to_updategate W_hid_to_updategate = network.W_hid_to_updategate b_updategate = network.b_updategate W_in_to_resetgate = network.W_in_to_resetgate W_hid_to_resetgate = network.W_hid_to_resetgate b_resetgate = network.b_resetgate W_in_to_hidden_update = network.W_in_to_hidden_update W_hid_to_hidden_update = network.W_hid_to_hidden_update b_hidden_update = network.b_hidden_update # add params self.params += layers.get_all_params(network, trainable=True) else: # GRULayer, but shared network = layers.GRULayer(incoming=network, num_units=self.num_units, only_return_final=True, resetgate=layers.Gate(W_in=W_in_to_resetgate, W_hid=W_hid_to_resetgate, b=b_resetgate), updategate=layers.Gate(W_in=W_in_to_updategate, W_hid=W_hid_to_updategate, b=b_updategate), hidden_update=layers.Gate(W_in=W_in_to_hidden_update, W_hid=W_hid_to_hidden_update, b=b_hidden_update)) rnn_network_outputs.append(layers.get_output(network)) all_output_var = T.concatenate(rnn_network_outputs, axis=1) print all_output_var.eval({self.input_var:example}).shape # InputLayer network = layers.InputLayer(shape=(None, self.num_units * num_channels), input_var=all_output_var) # Dropout Layer if (self.dropout > 0): network = layers.dropout(network, self.dropout) # BatchNormalization Layer if (self.batch_norm): network = layers.BatchNormLayer(incoming=network) # Last layer: classification network = layers.DenseLayer(incoming=network, num_units=176, nonlinearity=softmax) print layers.get_output(network).eval({self.input_var:example}).shape self.params += layers.get_all_params(network, trainable=True) self.prediction = layers.get_output(network) #print "==> param shapes", [x.eval().shape for x in self.params] self.loss_ce = lasagne.objectives.categorical_crossentropy(self.prediction, self.answer_var).mean() if (self.l2 > 0): self.loss_l2 = self.l2 * lasagne.regularization.apply_penalty(self.params, lasagne.regularization.l2) else: self.loss_l2 = 0 self.loss = self.loss_ce + self.loss_l2 #updates = lasagne.updates.adadelta(self.loss, self.params) updates = lasagne.updates.momentum(self.loss, self.params, learning_rate=0.003) if self.mode == 'train': print "==> compiling train_fn" self.train_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss], updates=updates) print "==> compiling test_fn" self.test_fn = theano.function(inputs=[self.input_var, self.answer_var], outputs=[self.prediction, self.loss]) def say_name(self): return "tc_net_rnn.4conv.pad.GRU.shared.num_units%d.5khz.augm" % self.num_units def read_batch(self, data_raw, batch_index): start_index = batch_index * self.batch_size end_index = start_index + self.batch_size data = np.zeros((self.batch_size, 1, 128, 768), dtype=np.float32) answers = [] for i in range(start_index, end_index): answers.append(int(data_raw[i].split(',')[1])) name = data_raw[i].split(',')[0] path = self.png_folder + name + ".png" im = Image.open(path) offset = random.randint(0, 90) data[i - start_index, 0, :, :] = np.array(im).astype(np.float32)[:128, offset:offset+768] / 256.0 answers = np.array(answers, dtype=np.int32) return data, answers ================================================ FILE: theano/plot.py ================================================ import numpy as np import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import sys import argparse import os #parsing arguments parser = argparse.ArgumentParser() parser.add_argument('--plot', type=str, default='plot.png', help='plotfile name with .png') parser.add_argument('--log', type=str, default='log.txt', help='log file name') parser.add_argument('--winVal', type=int, default='200', help='window for Val') parser.add_argument('--winTrain', type=int, default='200', help='window for Train') parser.add_argument('--no-legend', dest='legend', action='store_false') parser.add_argument('--no-accuracy', dest='accuracy', action='store_false') parser.add_argument('--no-loss', dest='loss', action='store_false') parser.add_argument('--start_epoch', type=float, default=-1.0, help='start plotting from that epoch') parser.set_defaults(loss=True) parser.set_defaults(legend=True) parser.set_defaults(accuracy=True) args = parser.parse_args() plotname = args.plot windowVal = args.winVal windowTrain = args.winTrain accuracy = [] def movingAverage(loss, window): mas = [] for i in range(len(loss)): j = i - window + 1 if (j < 0): j = 0 sum = 0.0 for k in range(window): sum += loss[j + k] mas.append(sum / window) return mas def plotTrainVal(filename, index, plotLabel): valx = [] valy = [] trainx = [] trainy = [] train_accuracyx = [] train_accuracyy = [] val_accuracyx = [] val_accuracyy = [] with open(filename, 'r') as logfile: for st in logfile.readlines(): head = st.split('\t')[0].strip() if (head[:7] == 'testing' or head[:8] == 'training'): iteration_expr = head[head.find(':')+1:] divpos = iteration_expr.find('/') first = iteration_expr[:divpos] iterations_per_epoch = float(iteration_expr[divpos+1:]) dotpos = first.find('.') epoch = float(first[:dotpos]) iteration = float(first[dotpos+1:]) x = epoch + iteration / iterations_per_epoch st_loss = st[st.find("avg_loss"):] cur_loss = float(st_loss[st_loss.find(':')+1:st_loss.find('\t')]) if (head[:7] == 'testing'): valx.append(x) valy.append(cur_loss) else: trainx.append(x) trainy.append(cur_loss) if st.strip()[:8] == "accuracy": cur_accuracy = float(st[st.find(':')+1:st.find("percent")]) / 100.0 if (len(train_accuracyx) > len(val_accuracyx)): val_accuracyx.append(valx[-1]) val_accuracyy.append(cur_accuracy) else: train_accuracyx.append(trainx[-1]) train_accuracyy.append(cur_accuracy) while(len(valx) > 0 and valx[0] < args.start_epoch): valx = valx[1:] valy = valy[1:] while(len(trainx) > 0 and trainx[0] < args.start_epoch): trainx = trainx[1:] trainy = trainy[1:] #window config wndVal = min(windowVal, int(0.8 * len(valy))) wndTrain = min(windowTrain, int(0.8 * len(trainy))) print "Train length: ", len(trainy), " \t\t window: ", wndTrain print "Val length: ", len(valy), " \t\t window: ", wndVal #movAvg and correcting length #valy = movingAverage(valy, wndVal) #trainy = movingAverage(trainy, wndTrain) #valx = valx[:len(valy)] #trainx = trainx[:len(trainy)] #plotting greenDiff = 50 redBlueDiff = 50 if (args.loss): plt.plot(trainx, trainy, '#00' + hex(index * greenDiff)[2:] + hex(256 - index * redBlueDiff)[2:], label=plotLabel + " train") plt.hold(True) plt.plot(valx, valy, '#' + hex(256 - index * redBlueDiff)[2:] + hex(index * greenDiff)[2:] + '00', label=plotLabel + " validation") plt.hold(True) if (args.accuracy): plt.plot(train_accuracyx, train_accuracyy, '#000000', label=plotLabel + " train_accuracy") plt.hold(True) plt.plot(val_accuracyx, val_accuracyy, '#00FF00', label=plotLabel + " val_accuracy") plt.hold(True) print "plot index =", index for (x, y) in zip(val_accuracyx, val_accuracyy): print "\tepoch = %.0f, accuracy = %f" % (x - 1, y) print '\tMax: %f // Epoch: %d' % (max(val_accuracyy), val_accuracyx[val_accuracyy.index(max(val_accuracyy))]) plotTrainVal(args.log, 1, args.log) if (args.legend): plt.legend(loc='upper right', fontsize='x-small') plt.gcf().savefig(plotname)